···1414 "sync"
1515 "time"
16161717+ htmlmd "github.com/JohannesKaufmann/html-to-markdown"
1718 imap "github.com/emersion/go-imap/v2"
1819 "github.com/emersion/go-imap/v2/imapclient"
1920 "github.com/emersion/go-message"
···364365 }
365366 }
366367368368+ // Prefer HTML: newsletters and modern emails have rich HTML while the
369369+ // text/plain part is typically a stripped dump with raw redirect URLs.
370370+ // Fall back to plain text for plain-text-only emails (e.g. direct replies).
371371+ if htmlText != "" {
372372+ return htmlToMarkdown(htmlText)
373373+ }
367374 if plainText != "" {
368368- return plainText
375375+ return normalizePlainText(plainText)
369376 }
370370- if htmlText != "" {
371371- return stripHTML(htmlText)
377377+ return "(no body)"
378378+}
379379+380380+// htmlToMarkdown converts an HTML email body to Markdown so glamour can render
381381+// it with proper formatting: bold, italic, links, headings, lists, and image
382382+// placeholders ( → [Image: alt] in the terminal).
383383+func htmlToMarkdown(h string) string {
384384+ // Remove <wbr> tags and join newlines inside href/src attribute values.
385385+ // Newsletter services (Substack, Mailchimp) insert line breaks inside URLs
386386+ // for HTML rendering; html-to-markdown preserves them, breaking link syntax.
387387+ h = regexp.MustCompile(`<wbr\s*/?>|​`).ReplaceAllString(h, "")
388388+ // Collapse whitespace (including newlines) inside href="..." and src="..."
389389+ reAttr := regexp.MustCompile(`(?s)((?:href|src)=")(.*?)(")`)
390390+ h = reAttr.ReplaceAllStringFunc(h, func(m string) string {
391391+ parts := reAttr.FindStringSubmatch(m)
392392+ if len(parts) != 4 {
393393+ return m
394394+ }
395395+ clean := regexp.MustCompile(`\s+`).ReplaceAllString(parts[2], "")
396396+ return parts[1] + clean + parts[3]
397397+ })
398398+399399+ converter := htmlmd.NewConverter("", true, nil)
400400+ result, err := converter.ConvertString(h)
401401+ if err != nil {
402402+ return stripHTMLFallback(h)
403403+ }
404404+ return cleanMarkdown(strings.TrimSpace(result))
405405+}
406406+407407+// cleanMarkdown post-processes html-to-markdown output to remove newsletter
408408+// noise: invisible Unicode spacers, tracking pixels, bare URL lines, and
409409+// excessive blank lines.
410410+func cleanMarkdown(s string) string {
411411+ // 1. Strip invisible Unicode characters used as email preheader spacers:
412412+ // U+034F COMBINING GRAPHEME JOINER, U+00AD SOFT HYPHEN,
413413+ // U+200B ZERO WIDTH SPACE, U+200C/D ZWNJ/ZWJ, U+FEFF BOM
414414+ reInvis := regexp.MustCompile(`[\x{034F}\x{00AD}\x{200B}\x{200C}\x{200D}\x{FEFF}]+`)
415415+ s = reInvis.ReplaceAllString(s, "")
416416+417417+ // 2. Remove empty image tags (tracking pixels):  or 
418418+ reEmptyImg := regexp.MustCompile(`!\[\s*\]\([^)]*\)`)
419419+ s = reEmptyImg.ReplaceAllString(s, "")
420420+421421+ // 3. Remove empty link anchors left behind when image-only links are cleaned:
422422+ // [](url) or [ ](url)
423423+ reEmptyLink := regexp.MustCompile(`\[\s*\]\([^)]*\)`)
424424+ s = reEmptyLink.ReplaceAllString(s, "")
425425+426426+ // 4. Remove lines that are only a bare URL (no surrounding text).
427427+ // These come from <a href="url"><img/></a> after the image is stripped,
428428+ // or from Substack's share/subscribe buttons whose text was an image.
429429+ reBareURL := regexp.MustCompile(`(?m)^https?://\S+$`)
430430+ s = reBareURL.ReplaceAllString(s, "")
431431+432432+ // 5. Remove lines that are only whitespace — including U+00A0 ( ) which
433433+ // gets decoded to and is NOT matched by \s in Go's regexp.
434434+ reWhitespaceOnly := regexp.MustCompile("(?m)^[ \t\u00A0\u202F\u2003\u2009]+$")
435435+ s = reWhitespaceOnly.ReplaceAllString(s, "")
436436+437437+ // 6. Collapse 3+ consecutive blank lines to 2
438438+ reExcessBlank := regexp.MustCompile(`\n{4,}`)
439439+ s = reExcessBlank.ReplaceAllString(s, "\n\n\n")
440440+441441+ return strings.TrimSpace(s)
442442+}
443443+444444+// normalizePlainText prepares a plain-text email body for glamour rendering.
445445+// Glamour treats single \n as paragraph continuation (Markdown spec), so bare
446446+// line breaks in plain-text emails collapse into run-on text. We add two
447447+// trailing spaces before each single newline, which Markdown treats as a hard
448448+// line break, preserving the original layout.
449449+func normalizePlainText(s string) string {
450450+ s = strings.ReplaceAll(s, "\r\n", "\n")
451451+ s = strings.ReplaceAll(s, "\r", "\n")
452452+ // Add hard-break markers before single newlines (not before blank lines).
453453+ var b strings.Builder
454454+ lines := strings.Split(s, "\n")
455455+ for i, line := range lines {
456456+ b.WriteString(line)
457457+ if i < len(lines)-1 {
458458+ next := lines[i+1]
459459+ if next == "" {
460460+ // Blank line: keep as paragraph separator (no trailing spaces needed)
461461+ b.WriteByte('\n')
462462+ } else {
463463+ // Single newline: add trailing double-space for Markdown hard break
464464+ b.WriteString(" \n")
465465+ }
466466+ }
372467 }
373373- return "(no body)"
468468+ return strings.TrimSpace(b.String())
374469}
375470376376-// stripHTML removes HTML tags, leaving readable plain text.
377377-func stripHTML(h string) string {
471471+// stripHTMLFallback is the last-resort plain-text extractor when the
472472+// html-to-markdown converter fails.
473473+func stripHTMLFallback(h string) string {
378474 reBlock := regexp.MustCompile(`(?is)<(style|script)[^>]*>.*?</(style|script)>`)
379475 h = reBlock.ReplaceAllString(h, "")
380476 reNewline := regexp.MustCompile(`(?i)</(p|div|br|li|tr|h[1-6]|blockquote)>`)
+7-4
internal/render/markdown.go
···66)
7788// ToANSI renders markdown as ANSI-styled terminal output for the reader view.
99-// theme should be "dark", "light", or "auto".
1010-func ToANSI(markdown, theme string) (string, error) {
99+// theme should be "dark", "light", or "auto". width is the terminal column
1010+// count; pass 0 to use the default (80).
1111+func ToANSI(markdown, theme string, width int) (string, error) {
1112 if theme == "" {
1213 theme = "dark"
1314 }
1515+ if width <= 0 {
1616+ width = 80
1717+ }
1418 r, err := glamour.NewTermRenderer(
1519 glamour.WithStylePath(theme),
1616- glamour.WithWordWrap(100),
2020+ glamour.WithWordWrap(width),
1721 )
1822 if err != nil {
1919- // Fall back to notty (no styling)
2023 return glamour.Render(markdown, "notty")
2124 }
2225 return r.Render(markdown)
+1-1
internal/ui/reader.go
···2020func loadEmailIntoReader(vp *viewport.Model, email *imap.Email, body, theme string, width int) error {
2121 header := renderEmailHeader(email, width)
22222323- rendered, err := render.ToANSI(body, theme)
2323+ rendered, err := render.ToANSI(body, theme, width)
2424 if err != nil {
2525 rendered = body // fall back to raw markdown
2626 }