nice clean recipes pear.dunkirk.sh
recipes
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: add flaresolver support and jsonld type array

+83 -4
+68 -2
internal/extract/pipeline.go
··· 1 1 package extract 2 2 3 3 import ( 4 + "bytes" 5 + "encoding/json" 4 6 "fmt" 5 7 "io" 6 8 "net/http" 9 + "os" 7 10 "strings" 8 11 "time" 9 12 ··· 13 16 ) 14 17 15 18 type Pipeline struct { 16 - client *http.Client 19 + client *http.Client 20 + flareSolverURL string 17 21 } 18 22 19 23 func NewPipeline() *Pipeline { ··· 21 25 client: &http.Client{ 22 26 Timeout: 15 * time.Second, 23 27 }, 28 + flareSolverURL: func() string { 29 + v := os.Getenv("FLARESOLVERR_URL") 30 + if v == "0" || v == "" { 31 + return "" 32 + } 33 + if v == "1" { 34 + return "http://localhost:8191/v1" 35 + } 36 + return v 37 + }(), 24 38 } 25 39 } 26 40 ··· 32 46 func (p *Pipeline) Extract(targetURL string) *Result { 33 47 body, err := p.fetch(targetURL) 34 48 if err != nil { 35 - return &Result{Error: fmt.Errorf("fetching page: %w", err)} 49 + if p.flareSolverURL != "" { 50 + flareBody, flareErr := p.fetchViaFlareSolver(targetURL) 51 + if flareErr != nil { 52 + return &Result{Error: fmt.Errorf("fetching page: %w (flaresolverr: %v)", err, flareErr)} 53 + } 54 + body = flareBody 55 + } else { 56 + return &Result{Error: fmt.Errorf("fetching page: %w", err)} 57 + } 36 58 } 37 59 38 60 if recipe, ok := schema.Extract(body); ok { ··· 81 103 } 82 104 83 105 return string(body), nil 106 + } 107 + 108 + func (p *Pipeline) fetchViaFlareSolver(targetURL string) (string, error) { 109 + reqBody, _ := json.Marshal(map[string]any{ 110 + "cmd": "request.get", 111 + "url": targetURL, 112 + "maxTimeout": 60000, 113 + }) 114 + 115 + req, err := http.NewRequest("POST", p.flareSolverURL, bytes.NewReader(reqBody)) 116 + if err != nil { 117 + return "", fmt.Errorf("creating flaresolverr request: %w", err) 118 + } 119 + req.Header.Set("Content-Type", "application/json") 120 + 121 + client := &http.Client{Timeout: 90 * time.Second} 122 + resp, err := client.Do(req) 123 + if err != nil { 124 + return "", fmt.Errorf("flaresolverr request: %w", err) 125 + } 126 + defer resp.Body.Close() 127 + 128 + body, err := io.ReadAll(resp.Body) 129 + if err != nil { 130 + return "", fmt.Errorf("reading flaresolverr response: %w", err) 131 + } 132 + 133 + var result struct { 134 + Status string `json:"status"` 135 + Message string `json:"message"` 136 + Solution struct { 137 + Response string `json:"response"` 138 + Status int `json:"status"` 139 + } `json:"solution"` 140 + } 141 + if err := json.Unmarshal(body, &result); err != nil { 142 + return "", fmt.Errorf("parsing flaresolverr response: %w", err) 143 + } 144 + 145 + if result.Status != "ok" { 146 + return "", fmt.Errorf("flaresolverr: %s", result.Message) 147 + } 148 + 149 + return result.Solution.Response, nil 84 150 } 85 151 86 152 func domainOf(url string) string {
+15 -2
internal/extract/schema/jsonld.go
··· 137 137 return recipe 138 138 } 139 139 140 + func isRecipeType(typ interface{}) bool { 141 + switch v := typ.(type) { 142 + case string: 143 + return v == "Recipe" 144 + case []interface{}: 145 + for _, item := range v { 146 + if s, ok := item.(string); ok && s == "Recipe" { 147 + return true 148 + } 149 + } 150 + } 151 + return false 152 + } 153 + 140 154 func findRecipeObject(v interface{}) interface{} { 141 155 switch val := v.(type) { 142 156 case map[string]interface{}: 143 157 if typ, ok := val["@type"]; ok { 144 - typeStr := fmt.Sprintf("%v", typ) 145 - if typeStr == "Recipe" { 158 + if isRecipeType(typ) { 146 159 return val 147 160 } 148 161 }