nice clean recipes pear.dunkirk.sh
recipes
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: add new recipe site

+239 -9
+7 -1
internal/extract/pipeline.go
··· 41 41 return &Result{Recipe: recipe} 42 42 } 43 43 44 + if recipe, ok := schema.ExtractMicrodata(body); ok { 45 + recipe.SourceURL = targetURL 46 + recipe.SourceDomain = domainOf(targetURL) 47 + return &Result{Recipe: recipe} 48 + } 49 + 44 50 if recipe, ok := hrecipe.Extract(body); ok { 45 51 recipe.SourceURL = targetURL 46 52 recipe.SourceDomain = domainOf(targetURL) 47 53 return &Result{Recipe: recipe} 48 54 } 49 55 50 - return &Result{Error: fmt.Errorf("no recipe found on page — tried JSON-LD and h-recipe extraction")} 56 + return &Result{Error: fmt.Errorf("no recipe found on page — tried JSON-LD, microdata, and h-recipe extraction")} 51 57 } 52 58 53 59 func (p *Pipeline) fetch(url string) (string, error) {
+211
internal/extract/schema/microdata.go
··· 1 + package schema 2 + 3 + import ( 4 + "fmt" 5 + "strings" 6 + 7 + "tangled.org/dunkirk.sh/pare/internal/models" 8 + 9 + "golang.org/x/net/html" 10 + ) 11 + 12 + func ExtractMicrodata(body string) (*models.Recipe, bool) { 13 + doc, err := html.Parse(strings.NewReader(body)) 14 + if err != nil { 15 + return nil, false 16 + } 17 + 18 + recipeNode := findMicrodataRecipe(doc) 19 + if recipeNode == nil { 20 + return nil, false 21 + } 22 + 23 + recipe := &models.Recipe{ExtractionMethod: "schema.org"} 24 + 25 + if name := getMicrodataProp(recipeNode, "name"); name != "" { 26 + recipe.Name = name 27 + } 28 + if desc := getMicrodataProp(recipeNode, "description"); desc != "" { 29 + recipe.Description = desc 30 + } 31 + if img := getMicrodataImageProp(recipeNode); img != "" { 32 + recipe.ImageURL = img 33 + } 34 + if y := getMicrodataProp(recipeNode, "recipeYield"); y != "" { 35 + recipe.Yield = cleanYield(y) 36 + } 37 + if pt := getMicrodataProp(recipeNode, "prepTime"); pt != "" { 38 + recipe.PrepTime = pt 39 + } 40 + if ct := getMicrodataProp(recipeNode, "cookTime"); ct != "" { 41 + recipe.CookTime = ct 42 + } 43 + if tt := getMicrodataProp(recipeNode, "totalTime"); tt != "" { 44 + recipe.TotalTime = tt 45 + } 46 + 47 + for _, ing := range getAllMicrodataProps(recipeNode, "ingredients") { 48 + recipe.Ingredients = append(recipe.Ingredients, parseIngredient(ing)) 49 + } 50 + // Also check "recipeIngredient" since some pages use that 51 + for _, ing := range getAllMicrodataProps(recipeNode, "recipeIngredient") { 52 + recipe.Ingredients = append(recipe.Ingredients, parseIngredient(ing)) 53 + } 54 + 55 + for _, instr := range getAllMicrodataProps(recipeNode, "recipeInstructions") { 56 + instr = strings.TrimSpace(instr) 57 + if instr != "" { 58 + recipe.Instructions = append(recipe.Instructions, models.Instruction{Text: instr}) 59 + } 60 + } 61 + 62 + if recipe.Yield != "" { 63 + fmt.Sscanf(recipe.Yield, "%d", &recipe.Servings) 64 + } 65 + 66 + if recipe.Name == "" { 67 + return nil, false 68 + } 69 + 70 + return recipe, true 71 + } 72 + 73 + func findMicrodataRecipe(n *html.Node) *html.Node { 74 + if n.Type == html.ElementNode { 75 + for _, attr := range n.Attr { 76 + if attr.Key == "itemtype" { 77 + typ := strings.TrimSpace(attr.Val) 78 + if typ == "http://schema.org/Recipe" || typ == "https://schema.org/Recipe" { 79 + return n 80 + } 81 + } 82 + } 83 + } 84 + for c := n.FirstChild; c != nil; c = c.NextSibling { 85 + if found := findMicrodataRecipe(c); found != nil { 86 + return found 87 + } 88 + } 89 + return nil 90 + } 91 + 92 + func getMicrodataProp(n *html.Node, prop string) string { 93 + var result string 94 + var f func(*html.Node) 95 + f = func(node *html.Node) { 96 + if result != "" { 97 + return 98 + } 99 + if node.Type == html.ElementNode { 100 + for _, attr := range node.Attr { 101 + if attr.Key == "itemprop" && attr.Val == prop { 102 + // For img elements, use src/content 103 + if node.Data == "img" { 104 + result = getAttrVal(node, "src") 105 + if result == "" { 106 + result = getAttrVal(node, "content") 107 + } 108 + } else if node.Data == "meta" { 109 + result = getAttrVal(node, "content") 110 + } else if node.Data == "time" { 111 + result = getAttrVal(node, "datetime") 112 + if result == "" { 113 + result = textContent(node) 114 + } 115 + } else if node.Data == "link" { 116 + result = getAttrVal(node, "href") 117 + } else { 118 + result = textContent(node) 119 + } 120 + return 121 + } 122 + } 123 + } 124 + for c := node.FirstChild; c != nil; c = c.NextSibling { 125 + f(c) 126 + } 127 + } 128 + f(n) 129 + return strings.TrimSpace(result) 130 + } 131 + 132 + func getMicrodataImageProp(n *html.Node) string { 133 + var f func(*html.Node) string 134 + f = func(node *html.Node) string { 135 + if node.Type == html.ElementNode { 136 + for _, attr := range node.Attr { 137 + if attr.Key == "itemprop" && (attr.Val == "image") { 138 + if node.Data == "img" { 139 + if src := getAttrVal(node, "src"); src != "" { 140 + return src 141 + } 142 + } 143 + if href := getAttrVal(node, "href"); href != "" { 144 + return href 145 + } 146 + if content := getAttrVal(node, "content"); content != "" { 147 + return content 148 + } 149 + } 150 + } 151 + } 152 + for c := node.FirstChild; c != nil; c = c.NextSibling { 153 + if found := f(c); found != "" { 154 + return found 155 + } 156 + } 157 + return "" 158 + } 159 + return f(n) 160 + } 161 + 162 + func getAllMicrodataProps(n *html.Node, prop string) []string { 163 + var results []string 164 + var f func(*html.Node) 165 + f = func(node *html.Node) { 166 + if node.Type == html.ElementNode { 167 + for _, attr := range node.Attr { 168 + if attr.Key == "itemprop" && attr.Val == prop { 169 + text := "" 170 + if node.Data == "img" { 171 + text = getAttrVal(node, "src") 172 + } else if node.Data == "meta" { 173 + text = getAttrVal(node, "content") 174 + } else { 175 + text = textContent(node) 176 + } 177 + text = strings.TrimSpace(text) 178 + if text != "" { 179 + results = append(results, text) 180 + } 181 + return 182 + } 183 + } 184 + } 185 + for c := node.FirstChild; c != nil; c = c.NextSibling { 186 + f(c) 187 + } 188 + } 189 + f(n) 190 + return results 191 + } 192 + 193 + func getAttrVal(n *html.Node, key string) string { 194 + for _, attr := range n.Attr { 195 + if attr.Key == key { 196 + return attr.Val 197 + } 198 + } 199 + return "" 200 + } 201 + 202 + func textContent(n *html.Node) string { 203 + if n.Type == html.TextNode { 204 + return n.Data 205 + } 206 + var sb strings.Builder 207 + for c := n.FirstChild; c != nil; c = c.NextSibling { 208 + sb.WriteString(textContent(c)) 209 + } 210 + return strings.TrimSpace(sb.String()) 211 + }
+3
main.go
··· 232 232 if path == "/" { 233 233 path = "" 234 234 } 235 + if len(path) > 35 { 236 + path = path[:32] + "…" 237 + } 235 238 return map[string]string{"host": host, "path": path} 236 239 } 237 240
+16 -6
ui/static/style.css
··· 128 128 font-size:0.8rem; 129 129 margin-top:0.35rem; 130 130 font-family:'Poppins',system-ui,sans-serif; 131 + overflow:hidden; 132 + text-overflow:ellipsis; 133 + white-space:nowrap; 131 134 } 132 135 .recipe-source a svg{ 133 136 margin-right:0.25rem; ··· 304 307 .btn-secondary:hover{border-color:var(--accent);text-decoration:none} 305 308 306 309 .error-box{ 307 - background:var(--error-bg); 308 - color:var(--error-text); 309 - padding:1.5rem; 310 - border-radius:var(--radius); 311 310 text-align:center; 311 + padding:3rem 1.5rem; 312 312 } 313 - .error-box h3{margin-bottom:0.5rem} 314 - .error-box p{font-size:0.9rem;margin-bottom:1rem} 313 + .error-box h3{ 314 + font-family:'Poppins',system-ui,sans-serif; 315 + font-size:1.1rem; 316 + font-weight:600; 317 + color:var(--text); 318 + margin-bottom:0.5rem; 319 + } 320 + .error-box p{ 321 + font-size:0.9rem; 322 + color:var(--text-muted); 323 + margin-bottom:1.25rem; 324 + } 315 325 316 326 .bookmarklet{ 317 327 margin-top:2rem;
+1 -1
ui/templates/index.html
··· 4 4 <head> 5 5 <meta charset="utf-8"> 6 6 <meta name="viewport" content="width=device-width,initial-scale=1"> 7 - <title>pare</title> 7 + <title>pare: nice recipes</title> 8 8 <link rel="icon" type="image/svg+xml" href="/static/favicon.svg"> 9 9 <link rel="stylesheet" href="/static/style.css"> 10 10 </head>
+1 -1
ui/templates/recipe.html
··· 4 4 <head> 5 5 <meta charset="utf-8"> 6 6 <meta name="viewport" content="width=device-width,initial-scale=1"> 7 - <title>pare - {{.Recipe.Name}}</title> 7 + <title>{{.Recipe.Name}}</title> 8 8 <link rel="icon" type="image/svg+xml" href="/static/favicon.svg"> 9 9 <link rel="stylesheet" href="/static/style.css"> 10 10 </head>