fix: strip HTML from text email and remove email-unsafe tags

+39 -5

email/render.go

··· 4 4 "bytes" 5 5 "embed" 6 6 htmltemplate "html/template" 7 + "regexp" 8 + "strings" 7 9 texttemplate "text/template" 8 10 "time" 9 11 ··· 36 38 type templateFeedItem struct { 37 39 Title string 38 40 Link string 39 - Content string // Original content for text template 41 + Content string // Original content (unused, kept for compatibility) 42 + PlainContent string // HTML-stripped content for text template 40 43 SanitizedContent htmltemplate.HTML // Sanitized HTML for HTML template 41 44 Published time.Time 42 45 } ··· 48 51 Items []templateFeedItem 49 52 } 50 53 54 + // emailUnsafeTags are HTML5 semantic tags not supported by most email clients (Gmail, Outlook, etc.) 55 + var emailUnsafeTags = regexp.MustCompile(`</?(?:article|section|nav|header|footer|aside|main|figure|figcaption|details|summary|mark|time|dialog)(?:\s[^>]*)?>`) 56 + 51 57 // sanitizeHTML sanitizes HTML content, allowing safe tags while stripping styles and unsafe elements 52 58 func sanitizeHTML(html string) string { 53 - return policy.Sanitize(html) 59 + sanitized := policy.Sanitize(html) 60 + // Strip HTML5 semantic tags that email clients don't support 61 + return emailUnsafeTags.ReplaceAllString(sanitized, "") 62 + } 63 + 64 + // htmlTagRegex matches HTML tags for stripping 65 + var htmlTagRegex = regexp.MustCompile(`<[^>]*>`) 66 + 67 + // stripHTML removes all HTML tags and decodes entities for plain text output 68 + func stripHTML(html string) string { 69 + // First sanitize to ensure we're working with clean HTML 70 + sanitized := policy.Sanitize(html) 71 + // Strip all remaining HTML tags 72 + text := htmlTagRegex.ReplaceAllString(sanitized, "") 73 + // Decode common HTML entities 74 + text = strings.ReplaceAll(text, "&", "&") 75 + text = strings.ReplaceAll(text, "<", "<") 76 + text = strings.ReplaceAll(text, ">", ">") 77 + text = strings.ReplaceAll(text, """, "\"") 78 + text = strings.ReplaceAll(text, "'", "'") 79 + text = strings.ReplaceAll(text, " ", " ") 80 + // Collapse multiple whitespace/newlines 81 + text = regexp.MustCompile(`\s+`).ReplaceAllString(text, " ") 82 + return strings.TrimSpace(text) 54 83 } 55 84 56 85 var ( ··· 86 115 Title: item.Title, 87 116 Link: item.Link, 88 117 Content: item.Content, 118 + PlainContent: stripHTML(item.Content), 89 119 SanitizedContent: htmltemplate.HTML(sanitizeHTML(item.Content)), // #nosec G203 -- Content is sanitized by bluemonday before conversion 90 120 Published: item.Published, 91 121 } ··· 116 146 ShowWarningBanner: showWarningBanner, 117 147 } 118 148 119 - // Prepare template data for text template (with original content) 149 + // Prepare template data for text template (with plain text content) 120 150 textTmplData := struct { 121 - *DigestData 151 + ConfigName string 152 + TotalItems int 153 + FeedGroups []templateFeedGroup 122 154 Inline bool 123 155 DaysUntilExpiry int 124 156 ShowUrgentBanner bool 125 157 ShowWarningBanner bool 126 158 }{ 127 - DigestData: data, 159 + ConfigName: data.ConfigName, 160 + TotalItems: data.TotalItems, 161 + FeedGroups: sanitizedGroups, 128 162 Inline: inline, 129 163 DaysUntilExpiry: daysUntilExpiry, 130 164 ShowUrgentBanner: showUrgentBanner,

+41

email/render_test.go

··· 93 93 t.Error("Safe HTML content was incorrectly removed") 94 94 } 95 95 } 96 + 97 + func TestRenderDigest_TextOutputNoHTMLTags(t *testing.T) { 98 + // Create test data with HTML content 99 + data := &DigestData{ 100 + ConfigName: "Test Config", 101 + TotalItems: 1, 102 + FeedGroups: []FeedGroup{ 103 + { 104 + FeedName: "Test Feed", 105 + FeedURL: "https://example.com/feed", 106 + Items: []FeedItem{ 107 + { 108 + Title: "Test Article", 109 + Link: "https://example.com/article", 110 + Content: "<article><p>This is a <strong>test</strong> article with <a href='https://example.com'>a link</a>.</p></article>", 111 + Published: time.Now(), 112 + }, 113 + }, 114 + }, 115 + }, 116 + } 117 + 118 + // Render with inline mode enabled 119 + _, textOutput, err := RenderDigest(data, true, 30, false, false) 120 + if err != nil { 121 + t.Fatalf("RenderDigest failed: %v", err) 122 + } 123 + 124 + // Verify text output does NOT contain HTML tags 125 + htmlTags := []string{"<article>", "<p>", "<strong>", "<a href", "</article>", "</p>", "</strong>", "</a>"} 126 + for _, tag := range htmlTags { 127 + if strings.Contains(textOutput, tag) { 128 + t.Errorf("Text output contains HTML tag %q - should be stripped", tag) 129 + } 130 + } 131 + 132 + // Verify the actual text content is present 133 + if !strings.Contains(textOutput, "This is a test article with a link") { 134 + t.Error("Text content was not preserved after HTML stripping") 135 + } 136 + }

+2 -2

email/templates/digest.txt

··· 14 14 {{range .Items}} 15 15 {{.Title}} 16 16 {{.Link}} 17 - {{if and $.Inline .Content}} 17 + {{if and $.Inline .PlainContent}} 18 18 19 - {{.Content}} 19 + {{.PlainContent}} 20 20 {{end}} 21 21 {{end}} 22 22 {{end}}

Configure Feed

Configure Feed