this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Follow git logic when parsing patch identities (#44)

When GitHub creates patches for Dependabot PRs, it generates a "From:"
line that is not valid according to RFC 5322: the address spec contains
unquoted special characters (the "[bot]" in "dependabot[bot]"). While
the 'net/mail' parser makes some exceptions to the spec, this is not one
of them, so parsing these patch headers fails.

Git's 'mailinfo' command avoids this by only implementing the unquoting
part of RFC 5322 and then applying a heuristic to separate the string in
to name and email values that seem reasonable.

This commit does two things:

1. Reimplements ParsePatchIdentity to follow Git's logic, so that it can
accept a wider range of inputs, including quoted strings. Strings
accepted by the previous implementation parse in the same way with
one exception: inputs that contain whitespace inside the angle
brackets for an email address now use the email address as the name
and drop any separate name component.

2. When parsing mail-formatted patches, use ParsePatchIdentity to parse
the "From:" line instead of the 'net/mail' function.

authored by

Billy Keyes and committed by
GitHub
a00d2cca 3f2ea5c1

+321 -142
+6 -65
gitdiff/patch_header.go
··· 68 68 return msg.String() 69 69 } 70 70 71 - // PatchIdentity identifies a person who authored or committed a patch. 72 - type PatchIdentity struct { 73 - Name string 74 - Email string 75 - } 76 - 77 - func (i PatchIdentity) String() string { 78 - name := i.Name 79 - if name == "" { 80 - name = `""` 81 - } 82 - return fmt.Sprintf("%s <%s>", name, i.Email) 83 - } 84 - 85 - // ParsePatchIdentity parses a patch identity string. A valid string contains 86 - // an optional name followed by an email address in angle brackets. The angle 87 - // brackets must always exist, but may enclose an empty address. At least one 88 - // of the name or the email address must be non-empty. If the string only 89 - // contains an email address, that value is also used as the name. 90 - // 91 - // The name must not contain a left angle bracket, '<', and the email address 92 - // must not contain a right angle bracket, '>'. Otherwise, there are no 93 - // restrictions on the format of either field. 94 - func ParsePatchIdentity(s string) (PatchIdentity, error) { 95 - var emailStart, emailEnd int 96 - for i, c := range s { 97 - if c == '<' && emailStart == 0 { 98 - emailStart = i + 1 99 - } 100 - if c == '>' && emailStart > 0 { 101 - emailEnd = i 102 - break 103 - } 104 - } 105 - if emailStart > 0 && emailEnd == 0 { 106 - return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s) 107 - } 108 - 109 - var name, email string 110 - if emailStart > 0 { 111 - name = strings.TrimSpace(s[:emailStart-1]) 112 - } 113 - if emailStart > 0 && emailEnd > 0 { 114 - email = strings.TrimSpace(s[emailStart:emailEnd]) 115 - } 116 - if name == "" && email != "" { 117 - name = email 118 - } 119 - 120 - if name == "" && email == "" { 121 - return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s) 122 - } 123 - 124 - return PatchIdentity{Name: name, Email: email}, nil 125 - } 126 - 127 71 // ParsePatchDate parses a patch date string. It returns the parsed time or an 128 72 // error if s has an unknown format. ParsePatchDate supports the iso, rfc, 129 73 // short, raw, unix, and default formats (with local variants) used by the ··· 425 369 } 426 370 } 427 371 428 - addrs, err := msg.Header.AddressList("From") 429 - if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) { 430 - return nil, err 431 - } 432 - if len(addrs) > 0 { 433 - addr := addrs[0] 434 - if addr.Name == "" { 435 - addr.Name = addr.Address 372 + from := msg.Header.Get("From") 373 + if from != "" { 374 + u, err := ParsePatchIdentity(from) 375 + if err != nil { 376 + return nil, err 436 377 } 437 - h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address} 378 + h.Author = &u 438 379 } 439 380 440 381 date := msg.Header.Get("Date")
+22 -77
gitdiff/patch_header_test.go
··· 5 5 "time" 6 6 ) 7 7 8 - func TestParsePatchIdentity(t *testing.T) { 9 - tests := map[string]struct { 10 - Input string 11 - Output PatchIdentity 12 - Err interface{} 13 - }{ 14 - "simple": { 15 - Input: "Morton Haypenny <mhaypenny@example.com>", 16 - Output: PatchIdentity{ 17 - Name: "Morton Haypenny", 18 - Email: "mhaypenny@example.com", 19 - }, 20 - }, 21 - "extraWhitespace": { 22 - Input: " Morton Haypenny <mhaypenny@example.com > ", 23 - Output: PatchIdentity{ 24 - Name: "Morton Haypenny", 25 - Email: "mhaypenny@example.com", 26 - }, 27 - }, 28 - "trailingCharacters": { 29 - Input: "Morton Haypenny <mhaypenny@example.com> unrelated garbage", 30 - Output: PatchIdentity{ 31 - Name: "Morton Haypenny", 32 - Email: "mhaypenny@example.com", 33 - }, 34 - }, 35 - "onlyEmail": { 36 - Input: "<mhaypenny@example.com>", 37 - Output: PatchIdentity{ 38 - Name: "mhaypenny@example.com", 39 - Email: "mhaypenny@example.com", 40 - }, 41 - }, 42 - "emptyEmail": { 43 - Input: "Morton Haypenny <>", 44 - Output: PatchIdentity{ 45 - Name: "Morton Haypenny", 46 - Email: "", 47 - }, 48 - }, 49 - "missingEmail": { 50 - Input: "Morton Haypenny", 51 - Err: "invalid identity", 52 - }, 53 - "missingNameAndEmptyEmail": { 54 - Input: "<>", 55 - Err: "invalid identity", 56 - }, 57 - "empty": { 58 - Input: "", 59 - Err: "invalid identity", 60 - }, 61 - "unclosedEmail": { 62 - Input: "Morton Haypenny <mhaypenny@example.com", 63 - Err: "unclosed email", 64 - }, 65 - } 66 - 67 - for name, test := range tests { 68 - t.Run(name, func(t *testing.T) { 69 - id, err := ParsePatchIdentity(test.Input) 70 - if test.Err != nil { 71 - assertError(t, test.Err, err, "parsing identity") 72 - return 73 - } 74 - if err != nil { 75 - t.Fatalf("unexpected error parsing identity: %v", err) 76 - } 77 - 78 - if test.Output != id { 79 - t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id) 80 - } 81 - }) 82 - } 83 - } 84 - 85 8 func TestParsePatchDate(t *testing.T) { 86 9 expected := time.Date(2020, 4, 9, 8, 7, 6, 0, time.UTC) 87 10 ··· 346 269 Author: expectedIdentity, 347 270 AuthorDate: expectedDate, 348 271 Title: expectedEmojiMultiLineTitle, 272 + Body: expectedBody, 273 + }, 274 + }, 275 + "mailboxRFC5322SpecialCharacters": { 276 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 277 + From: "dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com> 278 + Date: Sat, 11 Apr 2020 15:21:23 -0700 279 + Subject: [PATCH] A sample commit to test header parsing 280 + 281 + The medium format shows the body, which 282 + may wrap on to multiple lines. 283 + 284 + Another body line. 285 + `, 286 + Header: PatchHeader{ 287 + SHA: expectedSHA, 288 + Author: &PatchIdentity{ 289 + Name: "dependabot[bot]", 290 + Email: "12345+dependabot[bot]@users.noreply.github.com", 291 + }, 292 + AuthorDate: expectedDate, 293 + Title: expectedTitle, 349 294 Body: expectedBody, 350 295 }, 351 296 },
+166
gitdiff/patch_identity.go
··· 1 + package gitdiff 2 + 3 + import ( 4 + "fmt" 5 + "strings" 6 + ) 7 + 8 + // PatchIdentity identifies a person who authored or committed a patch. 9 + type PatchIdentity struct { 10 + Name string 11 + Email string 12 + } 13 + 14 + func (i PatchIdentity) String() string { 15 + name := i.Name 16 + if name == "" { 17 + name = `""` 18 + } 19 + return fmt.Sprintf("%s <%s>", name, i.Email) 20 + } 21 + 22 + // ParsePatchIdentity parses a patch identity string. A patch identity contains 23 + // an email address and an optional name in [RFC 5322] format. This is either a 24 + // plain email adddress or a name followed by an address in angle brackets: 25 + // 26 + // author@example.com 27 + // Author Name <author@example.com> 28 + // 29 + // If the input is not one of these formats, ParsePatchIdentity applies a 30 + // heuristic to separate the name and email portions. If both the name and 31 + // email are missing or empty, ParsePatchIdentity returns an error. It 32 + // otherwise does not validate the result. 33 + // 34 + // [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322 35 + func ParsePatchIdentity(s string) (PatchIdentity, error) { 36 + s = normalizeSpace(s) 37 + s = unquotePairs(s) 38 + 39 + var name, email string 40 + if at := strings.IndexByte(s, '@'); at >= 0 { 41 + start, end := at, at 42 + for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' { 43 + start-- 44 + } 45 + for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' { 46 + end++ 47 + } 48 + email = s[start+1 : end] 49 + 50 + // Adjust the boundaries so that we drop angle brackets, but keep 51 + // spaces when removing the email to form the name. 52 + if start < 0 || s[start] != '<' { 53 + start++ 54 + } 55 + if end >= len(s) || s[end] != '>' { 56 + end-- 57 + } 58 + name = s[:start] + s[end+1:] 59 + } else { 60 + start, end := 0, 0 61 + for i := 0; i < len(s); i++ { 62 + if s[i] == '<' && start == 0 { 63 + start = i + 1 64 + } 65 + if s[i] == '>' && start > 0 { 66 + end = i 67 + break 68 + } 69 + } 70 + if start > 0 && end >= start { 71 + email = strings.TrimSpace(s[start:end]) 72 + name = s[:start-1] 73 + } 74 + } 75 + 76 + // After extracting the email, the name might contain extra whitespace 77 + // again and may be surrounded by comment characters. The git source gives 78 + // these examples of when this can happen: 79 + // 80 + // "Name <email@domain>" 81 + // "email@domain (Name)" 82 + // "Name <email@domain> (Comment)" 83 + // 84 + name = normalizeSpace(name) 85 + if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") { 86 + name = name[1 : len(name)-1] 87 + } 88 + name = strings.TrimSpace(name) 89 + 90 + // If the name is empty or contains email-like characters, use the email 91 + // instead (assuming one exists) 92 + if name == "" || strings.ContainsAny(name, "@<>") { 93 + name = email 94 + } 95 + 96 + if name == "" && email == "" { 97 + return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s) 98 + } 99 + return PatchIdentity{Name: name, Email: email}, nil 100 + } 101 + 102 + // unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to 103 + // remove any "quoted-pairs" (backslash-espaced characters). It also removes 104 + // the quotes from any quoted strings, but leaves the comment delimiters. 105 + func unquotePairs(s string) string { 106 + quote := false 107 + comments := 0 108 + escaped := false 109 + 110 + var out strings.Builder 111 + for i := 0; i < len(s); i++ { 112 + if escaped { 113 + escaped = false 114 + } else { 115 + switch s[i] { 116 + case '\\': 117 + // quoted-pair is only allowed in quoted-string/comment 118 + if quote || comments > 0 { 119 + escaped = true 120 + continue // drop '\' character 121 + } 122 + 123 + case '"': 124 + if comments == 0 { 125 + quote = !quote 126 + continue // drop '"' character 127 + } 128 + 129 + case '(': 130 + if !quote { 131 + comments++ 132 + } 133 + case ')': 134 + if comments > 0 { 135 + comments-- 136 + } 137 + } 138 + } 139 + out.WriteByte(s[i]) 140 + } 141 + return out.String() 142 + } 143 + 144 + // normalizeSpace trims leading and trailing whitespace from s and converts 145 + // inner sequences of one or more whitespace characters to single spaces. 146 + func normalizeSpace(s string) string { 147 + var sb strings.Builder 148 + for i := 0; i < len(s); i++ { 149 + c := s[i] 150 + if !isRFC5332Space(c) { 151 + if sb.Len() > 0 && isRFC5332Space(s[i-1]) { 152 + sb.WriteByte(' ') 153 + } 154 + sb.WriteByte(c) 155 + } 156 + } 157 + return sb.String() 158 + } 159 + 160 + func isRFC5332Space(c byte) bool { 161 + switch c { 162 + case '\t', '\n', '\r', ' ': 163 + return true 164 + } 165 + return false 166 + }
+127
gitdiff/patch_identity_test.go
··· 1 + package gitdiff 2 + 3 + import ( 4 + "testing" 5 + ) 6 + 7 + func TestParsePatchIdentity(t *testing.T) { 8 + tests := map[string]struct { 9 + Input string 10 + Output PatchIdentity 11 + Err interface{} 12 + }{ 13 + "simple": { 14 + Input: "Morton Haypenny <mhaypenny@example.com>", 15 + Output: PatchIdentity{ 16 + Name: "Morton Haypenny", 17 + Email: "mhaypenny@example.com", 18 + }, 19 + }, 20 + "extraWhitespace": { 21 + Input: "\t Morton Haypenny \r\n<mhaypenny@example.com> ", 22 + Output: PatchIdentity{ 23 + Name: "Morton Haypenny", 24 + Email: "mhaypenny@example.com", 25 + }, 26 + }, 27 + "trailingCharacters": { 28 + Input: "Morton Haypenny <mhaypenny@example.com> II", 29 + Output: PatchIdentity{ 30 + Name: "Morton Haypenny II", 31 + Email: "mhaypenny@example.com", 32 + }, 33 + }, 34 + "onlyEmail": { 35 + Input: "mhaypenny@example.com", 36 + Output: PatchIdentity{ 37 + Name: "mhaypenny@example.com", 38 + Email: "mhaypenny@example.com", 39 + }, 40 + }, 41 + "onlyEmailInBrackets": { 42 + Input: "<mhaypenny@example.com>", 43 + Output: PatchIdentity{ 44 + Name: "mhaypenny@example.com", 45 + Email: "mhaypenny@example.com", 46 + }, 47 + }, 48 + "rfc5322SpecialCharacters": { 49 + Input: `"dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>`, 50 + Output: PatchIdentity{ 51 + Name: "dependabot[bot]", 52 + Email: "12345+dependabot[bot]@users.noreply.github.com", 53 + }, 54 + }, 55 + "rfc5322QuotedPairs": { 56 + Input: `"Morton \"Old-Timer\" Haypenny" <"mhaypenny\+[1900]"@example.com> (III \(PhD\))`, 57 + Output: PatchIdentity{ 58 + Name: `Morton "Old-Timer" Haypenny (III (PhD))`, 59 + Email: "mhaypenny+[1900]@example.com", 60 + }, 61 + }, 62 + "rfc5322QuotedPairsOutOfContext": { 63 + Input: `Morton \\Backslash Haypenny <mhaypenny@example.com>`, 64 + Output: PatchIdentity{ 65 + Name: `Morton \\Backslash Haypenny`, 66 + Email: "mhaypenny@example.com", 67 + }, 68 + }, 69 + "emptyEmail": { 70 + Input: "Morton Haypenny <>", 71 + Output: PatchIdentity{ 72 + Name: "Morton Haypenny", 73 + Email: "", 74 + }, 75 + }, 76 + "unclosedEmail": { 77 + Input: "Morton Haypenny <mhaypenny@example.com", 78 + Output: PatchIdentity{ 79 + Name: "Morton Haypenny", 80 + Email: "mhaypenny@example.com", 81 + }, 82 + }, 83 + "bogusEmail": { 84 + Input: "Morton Haypenny <mhaypenny>", 85 + Output: PatchIdentity{ 86 + Name: "Morton Haypenny", 87 + Email: "mhaypenny", 88 + }, 89 + }, 90 + "bogusEmailWithWhitespace": { 91 + Input: "Morton Haypenny < mhaypenny >", 92 + Output: PatchIdentity{ 93 + Name: "Morton Haypenny", 94 + Email: "mhaypenny", 95 + }, 96 + }, 97 + "missingEmail": { 98 + Input: "Morton Haypenny", 99 + Err: "invalid identity", 100 + }, 101 + "missingNameAndEmptyEmail": { 102 + Input: "<>", 103 + Err: "invalid identity", 104 + }, 105 + "empty": { 106 + Input: "", 107 + Err: "invalid identity", 108 + }, 109 + } 110 + 111 + for name, test := range tests { 112 + t.Run(name, func(t *testing.T) { 113 + id, err := ParsePatchIdentity(test.Input) 114 + if test.Err != nil { 115 + assertError(t, test.Err, err, "parsing identity") 116 + return 117 + } 118 + if err != nil { 119 + t.Fatalf("unexpected error parsing identity: %v", err) 120 + } 121 + 122 + if test.Output != id { 123 + t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id) 124 + } 125 + }) 126 + } 127 + }