Fix parsing for file names with spaces (#24) · oppi.li/go-gitdiff@3772c9e

+1

README.md

··· 75 75 76 76 - Numbers immediately followed by non-numeric characters 77 77 - Trailing characters on a line after valid or expected content 78 + - Malformed file header lines (lines that start with `diff --git`) 78 79 79 80 2. Errors for invalid input are generally more verbose and specific than those 80 81 from `git apply`.

+99 -23

gitdiff/file_header.go

··· 172 172 // If the names in the header do not match because the patch is a rename, 173 173 // return an empty default name. 174 174 func parseGitHeaderName(header string) (string, error) { 175 - firstName, n, err := parseName(header, -1, 1) 176 - if err != nil { 177 - return "", err 175 + header = strings.TrimSuffix(header, "\n") 176 + if len(header) == 0 { 177 + return "", nil 178 178 } 179 179 180 - if n < len(header) && (header[n] == ' ' || header[n] == '\t') { 181 - n++ 182 - } 180 + var err error 181 + var first, second string 182 + 183 + // there are 4 cases to account for: 184 + // 185 + // 1) unquoted unquoted 186 + // 2) unquoted "quoted" 187 + // 3) "quoted" unquoted 188 + // 4) "quoted" "quoted" 189 + // 190 + quote := strings.IndexByte(header, '"') 191 + switch { 192 + case quote < 0: 193 + // case 1 194 + first = header 195 + 196 + case quote > 0: 197 + // case 2 198 + first = header[:quote-1] 199 + if !isSpace(header[quote-1]) { 200 + return "", fmt.Errorf("missing separator") 201 + } 202 + 203 + second, _, err = parseQuotedName(header[quote:]) 204 + if err != nil { 205 + return "", err 206 + } 183 207 184 - secondName, _, err := parseName(header[n:], -1, 1) 185 - if err != nil { 186 - return "", err 208 + case quote == 0: 209 + // case 3 or case 4 210 + var n int 211 + first, n, err = parseQuotedName(header) 212 + if err != nil { 213 + return "", err 214 + } 215 + 216 + // git accepts multiple spaces after a quoted name, but not after an 217 + // unquoted name, since the name might end with one or more spaces 218 + for n < len(header) && isSpace(header[n]) { 219 + n++ 220 + } 221 + if n == len(header) { 222 + return "", nil 223 + } 224 + 225 + if header[n] == '"' { 226 + second, _, err = parseQuotedName(header[n:]) 227 + if err != nil { 228 + return "", err 229 + } 230 + } else { 231 + second = header[n:] 232 + } 187 233 } 188 234 189 - if firstName != secondName { 235 + first = trimTreePrefix(first, 1) 236 + if second != "" { 237 + if first == trimTreePrefix(second, 1) { 238 + return first, nil 239 + } 190 240 return "", nil 191 241 } 192 - return firstName, nil 242 + 243 + // at this point, both names are unquoted (case 1) 244 + // since names may contain spaces, we can't use a known separator 245 + // instead, look for a split that produces two equal names 246 + 247 + for i := 0; i < len(first)-1; i++ { 248 + if !isSpace(first[i]) { 249 + continue 250 + } 251 + second = trimTreePrefix(first[i+1:], 1) 252 + if name := first[:i]; name == second { 253 + return name, nil 254 + } 255 + } 256 + return "", nil 193 257 } 194 258 195 259 // parseGitHeaderData parses a single line of metadata from a Git file header. ··· 283 347 284 348 func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) { 285 349 f.IsCopy = true 286 - f.OldName, _, err = parseName(line, -1, 0) 350 + f.OldName, _, err = parseName(line, 0, 0) 287 351 return 288 352 } 289 353 290 354 func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) { 291 355 f.IsCopy = true 292 - f.NewName, _, err = parseName(line, -1, 0) 356 + f.NewName, _, err = parseName(line, 0, 0) 293 357 return 294 358 } 295 359 296 360 func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) { 297 361 f.IsRename = true 298 - f.OldName, _, err = parseName(line, -1, 0) 362 + f.OldName, _, err = parseName(line, 0, 0) 299 363 return 300 364 } 301 365 302 366 func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) { 303 367 f.IsRename = true 304 - f.NewName, _, err = parseName(line, -1, 0) 368 + f.NewName, _, err = parseName(line, 0, 0) 305 369 return 306 370 } 307 371 ··· 349 413 350 414 // parseName extracts a file name from the start of a string and returns the 351 415 // name and the index of the first character after the name. If the name is 352 - // unquoted and term is non-negative, parsing stops at the first occurrence of 353 - // term. Otherwise parsing of unquoted names stops at the first space or tab. 416 + // unquoted and term is non-zero, parsing stops at the first occurrence of 417 + // term. 354 418 // 355 419 // If the name is exactly "/dev/null", no further processing occurs. Otherwise, 356 420 // if dropPrefix is greater than zero, that number of prefix components 357 421 // separated by forward slashes are dropped from the name and any duplicate 358 422 // slashes are collapsed. 359 - func parseName(s string, term rune, dropPrefix int) (name string, n int, err error) { 423 + func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) { 360 424 if len(s) > 0 && s[0] == '"' { 361 425 name, n, err = parseQuotedName(s) 362 426 } else { ··· 387 451 return name, n, err 388 452 } 389 453 390 - func parseUnquotedName(s string, term rune) (name string, n int, err error) { 454 + func parseUnquotedName(s string, term byte) (name string, n int, err error) { 391 455 for n = 0; n < len(s); n++ { 392 456 if s[n] == '\n' { 393 457 break 394 458 } 395 - if term >= 0 && rune(s[n]) == term { 396 - break 397 - } 398 - if term < 0 && (s[n] == ' ' || s[n] == '\t') { 459 + if term > 0 && s[n] == term { 399 460 break 400 461 } 401 462 } ··· 440 501 return b.String() 441 502 } 442 503 504 + // trimTreePrefix removes up to n leading directory components from name. 505 + func trimTreePrefix(name string, n int) string { 506 + i := 0 507 + for ; i < len(name) && n > 0; i++ { 508 + if name[i] == '/' { 509 + n-- 510 + } 511 + } 512 + return name[i:] 513 + } 514 + 443 515 // hasEpochTimestamp returns true if the string ends with a POSIX-formatted 444 516 // timestamp for the UNIX epoch after a tab character. According to git, this 445 517 // is used by GNU diff to mark creations and deletions. ··· 468 540 } 469 541 return true 470 542 } 543 + 544 + func isSpace(c byte) bool { 545 + return c == ' ' || c == '\t' || c == '\n' 546 + }

+35 -8

gitdiff/file_header_test.go

··· 310 310 func TestParseName(t *testing.T) { 311 311 tests := map[string]struct { 312 312 Input string 313 - Term rune 313 + Term byte 314 314 Drop int 315 315 Output string 316 316 N int ··· 334 334 "dropPrefix": { 335 335 Input: "a/dir/file.txt", Drop: 1, Output: "dir/file.txt", N: 14, 336 336 }, 337 - "multipleNames": { 338 - Input: "dir/a.txt dir/b.txt", Term: -1, Output: "dir/a.txt", N: 9, 337 + "unquotedWithSpaces": { 338 + Input: "dir/with spaces.txt", Output: "dir/with spaces.txt", N: 19, 339 + }, 340 + "unquotedWithTrailingSpaces": { 341 + Input: "dir/with spaces.space ", Output: "dir/with spaces.space ", N: 23, 339 342 }, 340 343 "devNull": { 341 344 Input: "/dev/null", Term: '\t', Drop: 1, Output: "/dev/null", N: 9, 342 345 }, 343 - "newlineAlwaysSeparates": { 344 - Input: "dir/file.txt\n", Term: 0, Output: "dir/file.txt", N: 12, 346 + "newlineSeparates": { 347 + Input: "dir/file.txt\n", Output: "dir/file.txt", N: 12, 345 348 }, 346 349 "emptyString": { 347 350 Input: "", Err: true, ··· 630 633 Input: "a/dir/foo.txt b/dir/bar.txt", 631 634 Output: "", 632 635 }, 633 - "missingSecondName": { 634 - Input: "a/dir/foo.txt", 635 - Err: true, 636 + "matchingNamesWithSpaces": { 637 + Input: "a/dir/file with spaces.txt b/dir/file with spaces.txt", 638 + Output: "dir/file with spaces.txt", 639 + }, 640 + "matchingNamesWithTrailingSpaces": { 641 + Input: "a/dir/spaces b/dir/spaces ", 642 + Output: "dir/spaces ", 643 + }, 644 + "matchingNamesQuoted": { 645 + Input: `"a/dir/\"quotes\".txt" "b/dir/\"quotes\".txt"`, 646 + Output: `dir/"quotes".txt`, 647 + }, 648 + "matchingNamesFirstQuoted": { 649 + Input: `"a/dir/file.txt" b/dir/file.txt`, 650 + Output: "dir/file.txt", 651 + }, 652 + "matchingNamesSecondQuoted": { 653 + Input: `a/dir/file.txt "b/dir/file.txt"`, 654 + Output: "dir/file.txt", 655 + }, 656 + "noSecondName": { 657 + Input: "a/dir/foo.txt", 658 + Output: "", 659 + }, 660 + "noSecondNameQuoted": { 661 + Input: `"a/dir/foo.txt"`, 662 + Output: "", 636 663 }, 637 664 "invalidName": { 638 665 Input: `"a/dir/file.txt b/dir/file.txt`,

Configure Feed

Configure Feed