this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Fix EOF error for some files without final newline (#27)

If a file was an exact multiple of 1024 bytes (the size of an internal
buffer) and was missing a final newline, the LineReaderAt implementation
would drop the last line, leading to an unexpected EOF error on apply.

In addition to fixing the bug, slightly change the behavior of
ReadLineAt to reflect how it is actually used:

1. Clarify that the return value n includes all lines instead of only
lines with a final newline. This was already true except in the
case of the bug fixed by this commit.

2. Only return io.EOF if fewer lines are read than requested. The
previous implementation also returned io.EOF if the last line was
missing a final newline, but this was confusing and didn't really
serve a purpose.

This is technically a breaking change for external implementations but
an implementation that exactly followed the "spec" was already broken in
certain edge cases.

authored by

Billy Keyes and committed by
GitHub
53bcdf7e b5756546

+77 -28
+1 -4
gitdiff/apply.go
··· 231 231 232 232 preimage := make([][]byte, fragEnd-start) 233 233 n, err := a.lineSrc.ReadLinesAt(preimage, start) 234 - switch { 235 - case err == nil: 236 - case err == io.EOF && n == len(preimage): // last line of frag has no newline character 237 - default: 234 + if err != nil { 238 235 return applyError(err, lineNum(start+int64(n))) 239 236 } 240 237
+22 -22
gitdiff/io.go
··· 5 5 "io" 6 6 ) 7 7 8 + const ( 9 + byteBufferSize = 32 * 1024 // from io.Copy 10 + lineBufferSize = 32 11 + indexBufferSize = 1024 12 + ) 13 + 8 14 // LineReaderAt is the interface that wraps the ReadLinesAt method. 9 15 // 10 - // ReadLinesAt reads len(lines) into lines starting at line offset in the 11 - // input source. It returns number of full lines read (0 <= n <= len(lines)) 12 - // and any error encountered. Line numbers are zero-indexed. 16 + // ReadLinesAt reads len(lines) into lines starting at line offset. It returns 17 + // the number of lines read (0 <= n <= len(lines)) and any error encountered. 18 + // Line numbers are zero-indexed. 13 19 // 14 20 // If n < len(lines), ReadLinesAt returns a non-nil error explaining why more 15 21 // lines were not returned. 16 22 // 17 - // Each full line includes the line ending character(s). If the last line of 18 - // the input does not have a line ending character, ReadLinesAt returns the 19 - // content of the line and io.EOF. 20 - // 21 - // If the content of the input source changes after the first call to 22 - // ReadLinesAt, the behavior of future calls is undefined. 23 + // Lines read by ReadLinesAt include the newline character. The last line does 24 + // not have a final newline character if the input ends without one. 23 25 type LineReaderAt interface { 24 26 ReadLinesAt(lines [][]byte, offset int64) (n int, err error) 25 27 } ··· 65 67 lines[n] = buf[start:end] 66 68 } 67 69 68 - if n < count || buf[len(buf)-1] != '\n' { 70 + if n < count { 69 71 return n, io.EOF 70 72 } 71 73 return n, nil ··· 75 77 // for line or a read returns io.EOF. It returns an error if and only if there 76 78 // is an error reading data. 77 79 func (r *lineReaderAt) indexTo(line int64) error { 78 - var buf [1024]byte 79 - 80 - var offset int64 81 - if len(r.index) > 0 { 82 - offset = r.index[len(r.index)-1] 83 - } 80 + var buf [indexBufferSize]byte 84 81 82 + offset := r.lastOffset() 85 83 for int64(len(r.index)) < line { 86 84 n, err := r.r.ReadAt(buf[:], offset) 87 85 if err != nil && err != io.EOF { ··· 94 92 } 95 93 } 96 94 if err == io.EOF { 97 - if n > 0 && buf[n-1] != '\n' { 95 + if offset > r.lastOffset() { 98 96 r.index = append(r.index, offset) 99 97 } 100 98 r.eof = true ··· 102 100 } 103 101 } 104 102 return nil 103 + } 104 + 105 + func (r *lineReaderAt) lastOffset() int64 { 106 + if n := len(r.index); n > 0 { 107 + return r.index[n-1] 108 + } 109 + return 0 105 110 } 106 111 107 112 // readBytes reads the bytes of the n lines starting at line and returns the ··· 146 151 } 147 152 return false, err 148 153 } 149 - 150 - const ( 151 - byteBufferSize = 32 * 1024 // from io.Copy 152 - lineBufferSize = 32 153 - ) 154 154 155 155 // copyFrom writes bytes starting from offset off in src to dst stopping at the 156 156 // end of src or at the first error. copyFrom returns the number of bytes
+54 -2
gitdiff/io_test.go
··· 9 9 ) 10 10 11 11 func TestLineReaderAt(t *testing.T) { 12 + const lineTemplate = "generated test line %d\n" 13 + 12 14 tests := map[string]struct { 13 15 InputLines int 14 16 Offset int64 ··· 41 43 InputLines: 4, 42 44 Offset: 2, 43 45 Count: 0, 46 + }, 47 + "readAllLines": { 48 + InputLines: 64, 49 + Offset: 0, 50 + Count: 64, 44 51 }, 45 52 "readThroughEOF": { 46 53 InputLines: 16, ··· 71 78 }, 72 79 } 73 80 74 - const lineTemplate = "generated test line %d\n" 75 - 76 81 for name, test := range tests { 77 82 t.Run(name, func(t *testing.T) { 78 83 var input bytes.Buffer ··· 110 115 for i := 0; i < n; i++ { 111 116 if !bytes.Equal(output[i], lines[i]) { 112 117 t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], lines[i]) 118 + } 119 + } 120 + }) 121 + } 122 + 123 + newlineTests := map[string]struct { 124 + InputSize int 125 + }{ 126 + "readLinesNoFinalNewline": { 127 + InputSize: indexBufferSize + indexBufferSize/2, 128 + }, 129 + "readLinesNoFinalNewlineBufferMultiple": { 130 + InputSize: 4 * indexBufferSize, 131 + }, 132 + } 133 + 134 + for name, test := range newlineTests { 135 + t.Run(name, func(t *testing.T) { 136 + input := bytes.Repeat([]byte("0"), test.InputSize) 137 + 138 + var output [][]byte 139 + for i := 0; i < len(input); i++ { 140 + last := i 141 + i += rand.Intn(80) 142 + if i < len(input)-1 { // last character of input must not be a newline 143 + input[i] = '\n' 144 + output = append(output, input[last:i+1]) 145 + } else { 146 + output = append(output, input[last:]) 147 + } 148 + } 149 + 150 + r := &lineReaderAt{r: bytes.NewReader(input)} 151 + lines := make([][]byte, len(output)) 152 + 153 + n, err := r.ReadLinesAt(lines, 0) 154 + if err != nil { 155 + t.Fatalf("unexpected error reading reading lines: %v", err) 156 + } 157 + 158 + if n != len(output) { 159 + t.Fatalf("incorrect number of lines read: expected %d, actual %d", len(output), n) 160 + } 161 + 162 + for i, line := range lines { 163 + if !bytes.Equal(output[i], line) { 164 + t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], line) 113 165 } 114 166 } 115 167 })