beebo
1package rss
2
3import (
4 "bytes"
5 "encoding/xml"
6 "fmt"
7 "sort"
8 "strings"
9 "time"
10)
11
12func parseRSS2(data []byte) (*Feed, error) {
13 warnings := false
14 feed := rss2_0Feed{}
15 p := xml.NewDecoder(bytes.NewReader(data))
16 p.CharsetReader = charsetReader
17 p.Strict = false
18 err := p.Decode(&feed)
19 if err != nil {
20 return nil, err
21 }
22 if feed.Channel == nil {
23 return nil, fmt.Errorf("no channel found in %q", string(data))
24 }
25
26 channel := feed.Channel
27
28 out := new(Feed)
29 out.Title = channel.Title
30 out.Language = channel.Language
31 out.Author = channel.Author
32 out.Description = channel.Description
33 out.Categories = channel.Categories.toArray()
34 for _, link := range channel.Link {
35 if link.Rel == "" && link.Type == "" && link.Href == "" && link.Chardata != "" {
36 out.Link = link.Chardata
37 break
38 }
39 }
40 out.Image = channel.Image.Image()
41 if channel.MinsToLive != 0 {
42 sort.Ints(channel.SkipHours)
43 next := time.Now().Add(time.Duration(channel.MinsToLive) * time.Minute)
44 for _, hour := range channel.SkipHours {
45 if hour == next.Hour() {
46 next = next.Add(time.Duration(60-next.Minute()) * time.Minute)
47 }
48 }
49 trying := true
50 for trying {
51 trying = false
52 for _, day := range channel.SkipDays {
53 if strings.Title(day) == next.Weekday().String() {
54 next = next.Add(time.Duration(24-next.Hour()) * time.Hour)
55 trying = true
56 break
57 }
58 }
59 }
60
61 out.Refresh = next
62 }
63
64 if out.Refresh.IsZero() {
65 out.Refresh = time.Now().Add(DefaultRefreshInterval)
66 }
67
68 out.Items = make([]*Item, 0, len(channel.Items))
69 out.ItemMap = make(map[string]struct{})
70
71 // Process items.
72 for _, item := range channel.Items {
73
74 if item.ID == "" {
75 if item.Link == "" {
76 if debug {
77 fmt.Printf("[w] Item %q has no ID or link and will be ignored.\n", item.Title)
78 fmt.Printf("[w] %#v\n", item)
79 }
80 warnings = true
81 continue
82 }
83 item.ID = item.Link
84 }
85
86 // Skip items already known.
87 if _, ok := out.ItemMap[item.ID]; ok {
88 continue
89 }
90
91 next := new(Item)
92 next.Title = item.Title
93 next.Summary = item.Description
94 next.Categories = item.Categories
95 next.Link = item.Link
96 next.Image = item.Image.Image()
97 if item.Date != "" {
98 next.Date, err = parseTime(item.Date)
99 if err == nil {
100 next.DateValid = true
101 }
102 } else if item.PubDate != "" {
103 next.Date, err = parseTime(item.PubDate)
104 if err == nil {
105 next.DateValid = true
106 }
107 }
108 next.ID = item.ID
109 if len(item.Enclosures) > 0 {
110 next.Enclosures = make([]*Enclosure, len(item.Enclosures))
111 for i := range item.Enclosures {
112 next.Enclosures[i] = item.Enclosures[i].Enclosure()
113 }
114 }
115 next.Read = false
116
117 out.Items = append(out.Items, next)
118 out.ItemMap[next.ID] = struct{}{}
119 out.Unread++
120 }
121
122 if warnings && debug {
123 fmt.Printf("[i] Encountered warnings:\n%s\n", data)
124 }
125
126 return out, nil
127}
128
129type rss2_0Feed struct {
130 XMLName xml.Name `xml:"rss"`
131 Channel *rss2_0Channel `xml:"channel"`
132}
133
134type rss2_0Category struct {
135 XMLName xml.Name `xml:"category"`
136 Name string `xml:"text,attr"`
137}
138
139type rss2_0CategorySlice []rss2_0Category
140
141func (r rss2_0CategorySlice) toArray() (result []string) {
142 count := len(r)
143 if count == 0 || r == nil {
144 return
145 }
146 result = make([]string, count)
147 for i, _ := range r {
148 result[i] = r[i].Name
149 }
150 return
151}
152
153type rss2_0Channel struct {
154 XMLName xml.Name `xml:"channel"`
155 Title string `xml:"title"`
156 Language string `xml:"language"`
157 Author string `xml:"author"`
158 Description string `xml:"description"`
159 Link []rss2_0Link `xml:"link"`
160 Image rss2_0Image `xml:"image"`
161 Categories rss2_0CategorySlice `xml:"category"`
162 Items []rss2_0Item `xml:"item"`
163 MinsToLive int `xml:"ttl"`
164 SkipHours []int `xml:"skipHours>hour"`
165 SkipDays []string `xml:"skipDays>day"`
166}
167
168type rss2_0Link struct {
169 Rel string `xml:"rel,attr"`
170 Href string `xml:"href,attr"`
171 Type string `xml:"type,attr"`
172 Chardata string `xml:",chardata"`
173}
174
175type rss2_0Categories []string
176
177type rss2_0Item struct {
178 XMLName xml.Name `xml:"item"`
179 Title string `xml:"title"`
180 Description string `xml:"description"`
181 Categories rss2_0Categories `xml:"category"`
182 Link string `xml:"link"`
183 PubDate string `xml:"pubDate"`
184 Date string `xml:"date"`
185 Image rss2_0Image `xml:"image"`
186 DateValid bool
187 ID string `xml:"guid"`
188 Enclosures []rss2_0Enclosure `xml:"enclosure"`
189}
190
191type rss2_0Enclosure struct {
192 XMLName xml.Name `xml:"enclosure"`
193 URL string `xml:"url,attr"`
194 Type string `xml:"type,attr"`
195 Length uint `xml:"length,attr"`
196}
197
198func (r *rss2_0Enclosure) Enclosure() *Enclosure {
199 out := new(Enclosure)
200 out.URL = r.URL
201 out.Type = r.Type
202 out.Length = r.Length
203 return out
204}
205
206type rss2_0Image struct {
207 XMLName xml.Name `xml:"image"`
208 Href string `xml:"href,attr"`
209 Title string `xml:"title"`
210 URL string `xml:"url"`
211 Height int `xml:"height"`
212 Width int `xml:"width"`
213}
214
215func (i *rss2_0Image) Image() *Image {
216 out := new(Image)
217 out.Title = i.Title
218 out.Href = i.Href
219 out.URL = i.URL
220 out.Height = uint32(i.Height)
221 out.Width = uint32(i.Width)
222 return out
223}