this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Fix parsing for file names with spaces (#24)

Git does not quote file names containing spaces when generating header
lines. However, I misread the Git implementation and thought it used
spaces as a name terminator by default, implying that names with spaces
would be quoted.

Rewrite the the name parsing code to better match Git. Specifically,
when both names in a header are not quoted, test all splits to see if
any of them produce two equal names. Also account for spaces in file
names when parsing file metadata.

Finally, allow trailing spaces on file names, which seem to be allowed
by Git (although this sounds like a terrible idea to me.)

authored by

Billy Keyes and committed by
GitHub
3772c9eb b6666095

+135 -31
+1
README.md
··· 75 75 76 76 - Numbers immediately followed by non-numeric characters 77 77 - Trailing characters on a line after valid or expected content 78 + - Malformed file header lines (lines that start with `diff --git`) 78 79 79 80 2. Errors for invalid input are generally more verbose and specific than those 80 81 from `git apply`.
+99 -23
gitdiff/file_header.go
··· 172 172 // If the names in the header do not match because the patch is a rename, 173 173 // return an empty default name. 174 174 func parseGitHeaderName(header string) (string, error) { 175 - firstName, n, err := parseName(header, -1, 1) 176 - if err != nil { 177 - return "", err 175 + header = strings.TrimSuffix(header, "\n") 176 + if len(header) == 0 { 177 + return "", nil 178 178 } 179 179 180 - if n < len(header) && (header[n] == ' ' || header[n] == '\t') { 181 - n++ 182 - } 180 + var err error 181 + var first, second string 182 + 183 + // there are 4 cases to account for: 184 + // 185 + // 1) unquoted unquoted 186 + // 2) unquoted "quoted" 187 + // 3) "quoted" unquoted 188 + // 4) "quoted" "quoted" 189 + // 190 + quote := strings.IndexByte(header, '"') 191 + switch { 192 + case quote < 0: 193 + // case 1 194 + first = header 195 + 196 + case quote > 0: 197 + // case 2 198 + first = header[:quote-1] 199 + if !isSpace(header[quote-1]) { 200 + return "", fmt.Errorf("missing separator") 201 + } 202 + 203 + second, _, err = parseQuotedName(header[quote:]) 204 + if err != nil { 205 + return "", err 206 + } 183 207 184 - secondName, _, err := parseName(header[n:], -1, 1) 185 - if err != nil { 186 - return "", err 208 + case quote == 0: 209 + // case 3 or case 4 210 + var n int 211 + first, n, err = parseQuotedName(header) 212 + if err != nil { 213 + return "", err 214 + } 215 + 216 + // git accepts multiple spaces after a quoted name, but not after an 217 + // unquoted name, since the name might end with one or more spaces 218 + for n < len(header) && isSpace(header[n]) { 219 + n++ 220 + } 221 + if n == len(header) { 222 + return "", nil 223 + } 224 + 225 + if header[n] == '"' { 226 + second, _, err = parseQuotedName(header[n:]) 227 + if err != nil { 228 + return "", err 229 + } 230 + } else { 231 + second = header[n:] 232 + } 187 233 } 188 234 189 - if firstName != secondName { 235 + first = trimTreePrefix(first, 1) 236 + if second != "" { 237 + if first == trimTreePrefix(second, 1) { 238 + return first, nil 239 + } 190 240 return "", nil 191 241 } 192 - return firstName, nil 242 + 243 + // at this point, both names are unquoted (case 1) 244 + // since names may contain spaces, we can't use a known separator 245 + // instead, look for a split that produces two equal names 246 + 247 + for i := 0; i < len(first)-1; i++ { 248 + if !isSpace(first[i]) { 249 + continue 250 + } 251 + second = trimTreePrefix(first[i+1:], 1) 252 + if name := first[:i]; name == second { 253 + return name, nil 254 + } 255 + } 256 + return "", nil 193 257 } 194 258 195 259 // parseGitHeaderData parses a single line of metadata from a Git file header. ··· 283 347 284 348 func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) { 285 349 f.IsCopy = true 286 - f.OldName, _, err = parseName(line, -1, 0) 350 + f.OldName, _, err = parseName(line, 0, 0) 287 351 return 288 352 } 289 353 290 354 func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) { 291 355 f.IsCopy = true 292 - f.NewName, _, err = parseName(line, -1, 0) 356 + f.NewName, _, err = parseName(line, 0, 0) 293 357 return 294 358 } 295 359 296 360 func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) { 297 361 f.IsRename = true 298 - f.OldName, _, err = parseName(line, -1, 0) 362 + f.OldName, _, err = parseName(line, 0, 0) 299 363 return 300 364 } 301 365 302 366 func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) { 303 367 f.IsRename = true 304 - f.NewName, _, err = parseName(line, -1, 0) 368 + f.NewName, _, err = parseName(line, 0, 0) 305 369 return 306 370 } 307 371 ··· 349 413 350 414 // parseName extracts a file name from the start of a string and returns the 351 415 // name and the index of the first character after the name. If the name is 352 - // unquoted and term is non-negative, parsing stops at the first occurrence of 353 - // term. Otherwise parsing of unquoted names stops at the first space or tab. 416 + // unquoted and term is non-zero, parsing stops at the first occurrence of 417 + // term. 354 418 // 355 419 // If the name is exactly "/dev/null", no further processing occurs. Otherwise, 356 420 // if dropPrefix is greater than zero, that number of prefix components 357 421 // separated by forward slashes are dropped from the name and any duplicate 358 422 // slashes are collapsed. 359 - func parseName(s string, term rune, dropPrefix int) (name string, n int, err error) { 423 + func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) { 360 424 if len(s) > 0 && s[0] == '"' { 361 425 name, n, err = parseQuotedName(s) 362 426 } else { ··· 387 451 return name, n, err 388 452 } 389 453 390 - func parseUnquotedName(s string, term rune) (name string, n int, err error) { 454 + func parseUnquotedName(s string, term byte) (name string, n int, err error) { 391 455 for n = 0; n < len(s); n++ { 392 456 if s[n] == '\n' { 393 457 break 394 458 } 395 - if term >= 0 && rune(s[n]) == term { 396 - break 397 - } 398 - if term < 0 && (s[n] == ' ' || s[n] == '\t') { 459 + if term > 0 && s[n] == term { 399 460 break 400 461 } 401 462 } ··· 440 501 return b.String() 441 502 } 442 503 504 + // trimTreePrefix removes up to n leading directory components from name. 505 + func trimTreePrefix(name string, n int) string { 506 + i := 0 507 + for ; i < len(name) && n > 0; i++ { 508 + if name[i] == '/' { 509 + n-- 510 + } 511 + } 512 + return name[i:] 513 + } 514 + 443 515 // hasEpochTimestamp returns true if the string ends with a POSIX-formatted 444 516 // timestamp for the UNIX epoch after a tab character. According to git, this 445 517 // is used by GNU diff to mark creations and deletions. ··· 468 540 } 469 541 return true 470 542 } 543 + 544 + func isSpace(c byte) bool { 545 + return c == ' ' || c == '\t' || c == '\n' 546 + }
+35 -8
gitdiff/file_header_test.go
··· 310 310 func TestParseName(t *testing.T) { 311 311 tests := map[string]struct { 312 312 Input string 313 - Term rune 313 + Term byte 314 314 Drop int 315 315 Output string 316 316 N int ··· 334 334 "dropPrefix": { 335 335 Input: "a/dir/file.txt", Drop: 1, Output: "dir/file.txt", N: 14, 336 336 }, 337 - "multipleNames": { 338 - Input: "dir/a.txt dir/b.txt", Term: -1, Output: "dir/a.txt", N: 9, 337 + "unquotedWithSpaces": { 338 + Input: "dir/with spaces.txt", Output: "dir/with spaces.txt", N: 19, 339 + }, 340 + "unquotedWithTrailingSpaces": { 341 + Input: "dir/with spaces.space ", Output: "dir/with spaces.space ", N: 23, 339 342 }, 340 343 "devNull": { 341 344 Input: "/dev/null", Term: '\t', Drop: 1, Output: "/dev/null", N: 9, 342 345 }, 343 - "newlineAlwaysSeparates": { 344 - Input: "dir/file.txt\n", Term: 0, Output: "dir/file.txt", N: 12, 346 + "newlineSeparates": { 347 + Input: "dir/file.txt\n", Output: "dir/file.txt", N: 12, 345 348 }, 346 349 "emptyString": { 347 350 Input: "", Err: true, ··· 630 633 Input: "a/dir/foo.txt b/dir/bar.txt", 631 634 Output: "", 632 635 }, 633 - "missingSecondName": { 634 - Input: "a/dir/foo.txt", 635 - Err: true, 636 + "matchingNamesWithSpaces": { 637 + Input: "a/dir/file with spaces.txt b/dir/file with spaces.txt", 638 + Output: "dir/file with spaces.txt", 639 + }, 640 + "matchingNamesWithTrailingSpaces": { 641 + Input: "a/dir/spaces b/dir/spaces ", 642 + Output: "dir/spaces ", 643 + }, 644 + "matchingNamesQuoted": { 645 + Input: `"a/dir/\"quotes\".txt" "b/dir/\"quotes\".txt"`, 646 + Output: `dir/"quotes".txt`, 647 + }, 648 + "matchingNamesFirstQuoted": { 649 + Input: `"a/dir/file.txt" b/dir/file.txt`, 650 + Output: "dir/file.txt", 651 + }, 652 + "matchingNamesSecondQuoted": { 653 + Input: `a/dir/file.txt "b/dir/file.txt"`, 654 + Output: "dir/file.txt", 655 + }, 656 + "noSecondName": { 657 + Input: "a/dir/foo.txt", 658 + Output: "", 659 + }, 660 + "noSecondNameQuoted": { 661 + Input: `"a/dir/foo.txt"`, 662 + Output: "", 636 663 }, 637 664 "invalidName": { 638 665 Input: `"a/dir/file.txt b/dir/file.txt`,