···12011201// it with proper formatting: bold, italic, links, headings, lists, and image
12021202// placeholders ( → [Image: alt] in the terminal).
12031203func htmlToMarkdown(h string) (string, SpyPixelInfo) {
12041204+ // Detect spy pixels on raw HTML before conversion (size/visibility heuristics).
12051205+ spy := detectSpyPixels(h)
12061206+12041207 // Remove <wbr> tags and join newlines inside href/src attribute values.
12051208 // Newsletter services (Substack, Mailchimp) insert line breaks inside URLs
12061209 // for HTML rendering; html-to-markdown preserves them, breaking link syntax.
···12191222 converter := htmlmd.NewConverter("", true, nil)
12201223 result, err := converter.ConvertString(h)
12211224 if err != nil {
12221222- return stripHTMLFallback(h), SpyPixelInfo{}
12251225+ return stripHTMLFallback(h), spy
12231226 }
12241224- return cleanMarkdown(strings.TrimSpace(result))
12271227+ return cleanMarkdown(strings.TrimSpace(result)), spy
12251228}
1226122912301230+// SpyPixelInfo holds the results of tracking pixel detection.
12271231// SpyPixelInfo holds the results of tracking pixel detection.
12281232type SpyPixelInfo struct {
12291229- Count int // number of tracking pixels stripped
12331233+ Count int // number of tracking pixels detected
12301234 Domains []string // unique tracker domains extracted from pixel URLs
12311235}
1232123612371237+// reSpyPixel matches <img> tags that look like tracking pixels in raw HTML:
12381238+// - empty or whitespace-only alt attribute
12391239+// - AND at least one of: width/height of 0 or 1, display:none, visibility:hidden,
12401240+// or known tracker URL patterns (track/open, pixel, beacon).
12411241+// This avoids false positives on legitimate decorative images or image-only buttons.
12421242+var reSpyPixel = regexp.MustCompile(`(?i)<img\b[^>]*\bsrc="(https?://[^"]+)"[^>]*>`)
12431243+12441244+// isSpyPixel checks if an <img> tag is a tracking pixel based on heuristics.
12451245+func isSpyPixel(tag string) bool {
12461246+ // Must have empty or missing alt to be considered a tracker.
12471247+ // Match alt="non-empty-content" — if present, it's a real image.
12481248+ hasNonEmptyAlt := regexp.MustCompile(`(?i)\balt=["'][^"']+["']`).MatchString(tag)
12491249+ if hasNonEmptyAlt {
12501250+ return false
12511251+ }
12521252+ // Check size heuristics: width="1", height="1", width="0", height="0"
12531253+ if regexp.MustCompile(`(?i)\b(?:width|height)=["']?[01](?:px)?["']?`).MatchString(tag) {
12541254+ return true
12551255+ }
12561256+ // Check CSS hiding: display:none, visibility:hidden
12571257+ if regexp.MustCompile(`(?i)(?:display\s*:\s*none|visibility\s*:\s*hidden)`).MatchString(tag) {
12581258+ return true
12591259+ }
12601260+ // Check known tracker URL patterns in src
12611261+ src := reSpyPixel.FindStringSubmatch(tag)
12621262+ if len(src) >= 2 {
12631263+ u := strings.ToLower(src[1])
12641264+ trackerPatterns := []string{
12651265+ "/track/open", "/track/click", "open.php",
12661266+ "/pixel", "/beacon", "/wf/open", "/o.gif",
12671267+ "list-manage.com/track",
12681268+ }
12691269+ for _, p := range trackerPatterns {
12701270+ if strings.Contains(u, p) {
12711271+ return true
12721272+ }
12731273+ }
12741274+ }
12751275+ return false
12761276+}
12771277+12781278+// detectSpyPixels scans raw HTML for tracking pixel <img> tags.
12791279+func detectSpyPixels(html string) SpyPixelInfo {
12801280+ var spy SpyPixelInfo
12811281+ // Find all <img> tags
12821282+ reImg := regexp.MustCompile(`(?i)<img\b[^>]*>`)
12831283+ tags := reImg.FindAllString(html, -1)
12841284+ seen := make(map[string]bool)
12851285+ for _, tag := range tags {
12861286+ if isSpyPixel(tag) {
12871287+ spy.Count++
12881288+ src := reSpyPixel.FindStringSubmatch(tag)
12891289+ if len(src) >= 2 {
12901290+ if d := extractDomain(src[1]); d != "" && !seen[d] {
12911291+ seen[d] = true
12921292+ spy.Domains = append(spy.Domains, d)
12931293+ }
12941294+ }
12951295+ }
12961296+ }
12971297+ return spy
12981298+}
12991299+12331300// reEmptyImg matches empty markdown image tags produced from tracking pixels.
12341301var reEmptyImg = regexp.MustCompile(`!\[\s*\]\(([^)]*)\)`)
1235130212361303// cleanMarkdown post-processes html-to-markdown output to remove newsletter
12371237-// noise: invisible Unicode spacers, tracking pixels, bare URL lines, and
12381238-// excessive blank lines. Returns the cleaned string and spy pixel info.
12391239-func cleanMarkdown(s string) (string, SpyPixelInfo) {
13041304+// noise: invisible Unicode spacers, empty images, bare URL lines, and
13051305+// excessive blank lines.
13061306+func cleanMarkdown(s string) string {
12401307 // 1. Strip invisible Unicode characters used as email preheader spacers:
12411308 // U+034F COMBINING GRAPHEME JOINER, U+00AD SOFT HYPHEN,
12421309 // U+200B ZERO WIDTH SPACE, U+200C/D ZWNJ/ZWJ, U+FEFF BOM
12431310 reInvis := regexp.MustCompile(`[\x{034F}\x{00AD}\x{200B}\x{200C}\x{200D}\x{FEFF}]+`)
12441311 s = reInvis.ReplaceAllString(s, "")
1245131212461246- // 2. Detect and remove empty image tags (tracking pixels):  or 
12471247- var spy SpyPixelInfo
12481248- matches := reEmptyImg.FindAllStringSubmatch(s, -1)
12491249- spy.Count = len(matches)
12501250- if spy.Count > 0 {
12511251- seen := make(map[string]bool)
12521252- for _, m := range matches {
12531253- if d := extractDomain(m[1]); d != "" && !seen[d] {
12541254- seen[d] = true
12551255- spy.Domains = append(spy.Domains, d)
12561256- }
12571257- }
12581258- s = reEmptyImg.ReplaceAllString(s, "")
12591259- }
13131313+ // 2. Remove empty image tags:  or 
13141314+ s = reEmptyImg.ReplaceAllString(s, "")
1260131512611316 // 3. Remove empty link anchors left behind when image-only links are cleaned:
12621317 // [](url) or [ ](url)
···12781333 reExcessBlank := regexp.MustCompile(`\n{4,}`)
12791334 s = reExcessBlank.ReplaceAllString(s, "\n\n\n")
1280133512811281- return strings.TrimSpace(s), spy
13361336+ return strings.TrimSpace(s)
12821337}
1283133812841339// extractDomain pulls the hostname from a URL string, returning "" on failure.
+14-6
internal/imap/client_test.go
···409409410410func TestSpyPixelDetection(t *testing.T) {
411411 // HTML email with 2 tracking pixels from different domains.
412412+ // First: detected by size heuristic (width="1" height="1")
413413+ // Second: detected by URL pattern (/track/open)
414414+ // Third: legitimate image with alt text — should NOT be counted
415415+ // Fourth: decorative image with empty alt but normal size — should NOT be counted
412416 raw := "MIME-Version: 1.0\r\n" +
413417 "Content-Type: text/html; charset=utf-8\r\n" +
414418 "\r\n" +
415419 `<html><body>` +
416420 `<p>Hello world</p>` +
417417- `<img src="https://open.mailchimp.com/track/abc123" alt="" width="1" height="1">` +
418418- `<img src="https://pixel.sendinblue.com/log/open?id=xyz" alt="">` +
421421+ `<img src="https://click.mailchimp.com/track/open.php?id=abc" alt="" width="1" height="1">` +
422422+ `<img src="https://pixel.sendinblue.com/beacon/track/open?id=xyz" alt="" height="0">` +
419423 `<img src="cid:logo" alt="Company Logo">` +
424424+ `<img src="https://cdn.example.com/button.png" alt="" width="200" height="50">` +
420425 `</body></html>`
421426422427 _, _, _, _, _, spy := parseBody([]byte(raw))
423428424424- if spy.Count < 2 {
425425- t.Errorf("SpyPixelInfo.Count = %d, want >= 2", spy.Count)
429429+ if spy.Count != 2 {
430430+ t.Errorf("SpyPixelInfo.Count = %d, want 2", spy.Count)
426431 }
427432 // Check that domains were extracted
428433 found := make(map[string]bool)
429434 for _, d := range spy.Domains {
430435 found[d] = true
431436 }
432432- if !found["open.mailchimp.com"] {
433433- t.Errorf("expected domain open.mailchimp.com in spy.Domains, got %v", spy.Domains)
437437+ if !found["click.mailchimp.com"] {
438438+ t.Errorf("expected domain click.mailchimp.com in spy.Domains, got %v", spy.Domains)
434439 }
435440 if !found["pixel.sendinblue.com"] {
436441 t.Errorf("expected domain pixel.sendinblue.com in spy.Domains, got %v", spy.Domains)
442442+ }
443443+ if found["cdn.example.com"] {
444444+ t.Errorf("decorative image cdn.example.com should NOT be counted as spy pixel, got %v", spy.Domains)
437445 }
438446}
439447
+22
internal/render/html.go
···6161 goldmark.WithRendererOptions(html.WithHardWraps()),
6262)
63636464+// cspMeta is the Content-Security-Policy meta tag injected into all browser views.
6565+// Blocks remote images (tracking pixels), scripts, and fonts.
6666+const cspMeta = `<meta http-equiv="Content-Security-Policy" content="default-src 'none'; style-src 'unsafe-inline'; img-src file: data: cid:; font-src 'none';">`
6767+6868+// InjectCSP inserts a CSP meta tag into an HTML document for safe browser viewing.
6969+// If the document has a <head>, the tag is inserted after it. Otherwise it's prepended.
7070+func InjectCSP(html string) string {
7171+ if idx := strings.Index(strings.ToLower(html), "<head>"); idx >= 0 {
7272+ insert := idx + len("<head>")
7373+ return html[:insert] + "\n" + cspMeta + html[insert:]
7474+ }
7575+ if idx := strings.Index(strings.ToLower(html), "<html"); idx >= 0 {
7676+ // Find the end of the <html...> tag
7777+ end := strings.IndexByte(html[idx:], '>')
7878+ if end >= 0 {
7979+ insert := idx + end + 1
8080+ return html[:insert] + "<head>" + cspMeta + "</head>" + html[insert:]
8181+ }
8282+ }
8383+ return cspMeta + "\n" + html
8484+}
8585+6486// ToHTML converts a Markdown string to a complete HTML email document.
6587func ToHTML(markdown string) (string, error) {
6688 var fragment bytes.Buffer