this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Fix binary chunk decoding and inflating

Git uses a unique Base85 encoding with different characters than the
ascii85 encoding implemented by Go, so add a custom decoding function.
Once decoded, use zlib instead of the raw DEFLATE algorithm to
decompress the data.

These issues were caught by some basic parsing tests which are added
here as well.

+135 -22
+56
gitdiff/base85.go
··· 1 + package gitdiff 2 + 3 + import ( 4 + "fmt" 5 + ) 6 + 7 + const ( 8 + base85Alphabet = "0123456789" + 9 + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + 10 + "abcdefghijklmnopqrstuvwxyz" + 11 + "!#$%&()*+-;<=>?@^_`{|}~" 12 + ) 13 + 14 + var ( 15 + de85 map[byte]byte 16 + ) 17 + 18 + func init() { 19 + de85 = make(map[byte]byte) 20 + for i, c := range base85Alphabet { 21 + de85[byte(c)] = byte(i) 22 + } 23 + } 24 + 25 + // base85Decode decodes Base85-encoded data from src into dst. It uses the 26 + // alphabet defined by base85.c in the Git source tree, which appears to be 27 + // unique. src must contain at least len(dst) bytes of encoded data. 28 + func base85Decode(dst, src []byte) error { 29 + var v uint32 30 + var n, ndst int 31 + for i, b := range src { 32 + if b, ok := de85[b]; ok { 33 + v = 85*v + uint32(b) 34 + n++ 35 + } else { 36 + return fmt.Errorf("invalid base85 byte at index %d: 0x%x", i, b) 37 + } 38 + if n == 5 { 39 + rem := len(dst) - ndst 40 + for j := 0; j < 4 && j < rem; j++ { 41 + dst[ndst] = byte(v >> 24) 42 + ndst++ 43 + v <<= 8 44 + } 45 + v = 0 46 + n = 0 47 + } 48 + } 49 + if n > 0 { 50 + return fmt.Errorf("base85 data terminated by underpadded sequence") 51 + } 52 + if ndst < len(dst) { 53 + return fmt.Errorf("base85 data is too short: %d < %d", ndst, len(dst)) 54 + } 55 + return nil 56 + }
+18 -22
gitdiff/parser.go
··· 3 3 import ( 4 4 "bufio" 5 5 "bytes" 6 - "compress/flate" 7 - "encoding/ascii85" 6 + "compress/zlib" 8 7 "fmt" 9 8 "io" 10 9 "io/ioutil" ··· 423 422 if line == "\n" { 424 423 break 425 424 } 426 - 427 425 if len(line) < len(shortestValidLine) || (len(line)-2)%5 != 0 { 428 426 return p.Errorf(0, "binary patch: corrupt data line") 429 427 } 430 428 431 - byteCount := int(line[0]) 429 + byteCount, seq := int(line[0]), line[1:len(line)-1] 432 430 switch { 433 431 case 'A' <= byteCount && byteCount <= 'Z': 434 432 byteCount = byteCount - 'A' + 1 435 433 case 'a' <= byteCount && byteCount <= 'z': 436 434 byteCount = byteCount - 'a' + 27 437 435 default: 438 - return p.Errorf(0, "binary patch: invalid length byte: %q", line[0]) 436 + return p.Errorf(0, "binary patch: invalid length byte") 439 437 } 440 438 441 439 // base85 encodes every 4 bytes into 5 characters, with up to 3 bytes of end padding 442 - maxByteCount := (len(line) - 2) / 5 * 4 443 - if byteCount >= maxByteCount || byteCount < maxByteCount-3 { 444 - return p.Errorf(0, "binary patch: incorrect byte count: %d", byteCount) 440 + maxByteCount := len(seq) / 5 * 4 441 + if byteCount > maxByteCount || byteCount < maxByteCount-3 { 442 + return p.Errorf(0, "binary patch: incorrect byte count") 445 443 } 446 444 447 - ndst, _, err := ascii85.Decode(buf, []byte(line[1:]), byteCount < maxBytesPerLine) 448 - if err != nil { 445 + if err := base85Decode(buf[:byteCount], []byte(seq)); err != nil { 449 446 return p.Errorf(0, "binary patch: %v", err) 450 447 } 451 - if ndst != byteCount { 452 - return p.Errorf(0, "binary patch: %d byte line decoded as %d", byteCount, ndst) 453 - } 454 - data.Write(buf[:ndst]) 448 + data.Write(buf[:byteCount]) 455 449 456 450 if err := p.Next(); err != nil { 457 451 if err == io.EOF { ··· 472 466 return nil 473 467 } 474 468 475 - func inflateBinaryChunk(frag *BinaryFragment, r io.Reader) (err error) { 476 - inflater := flate.NewReader(r) 477 - defer func() { 478 - if cerr := inflater.Close(); cerr != nil && err == nil { 479 - err = cerr 480 - } 481 - }() 469 + func inflateBinaryChunk(frag *BinaryFragment, r io.Reader) error { 470 + zr, err := zlib.NewReader(r) 471 + if err != nil { 472 + return err 473 + } 482 474 483 - data, err := ioutil.ReadAll(inflater) 475 + data, err := ioutil.ReadAll(zr) 484 476 if err != nil { 485 477 return err 486 478 } 479 + if err := zr.Close(); err != nil { 480 + return err 481 + } 482 + 487 483 if int64(len(data)) != frag.Size { 488 484 return fmt.Errorf("%d byte fragment inflated to %d", frag.Size, len(data)) 489 485 }
+61
gitdiff/parser_binary_test.go
··· 1 1 package gitdiff 2 2 3 3 import ( 4 + "encoding/binary" 4 5 "io" 5 6 "reflect" 6 7 "testing" ··· 108 109 }) 109 110 } 110 111 } 112 + 113 + func TestParseBinaryChunk(t *testing.T) { 114 + tests := map[string]struct { 115 + Input string 116 + Fragment BinaryFragment 117 + Output []byte 118 + Err bool 119 + }{ 120 + "newFile": { 121 + Input: "gcmZQzU|?i`U?w2V48*KJ%mKu_Kr9NxN<eH500b)lkN^Mx\n\n", 122 + Fragment: BinaryFragment{ 123 + Size: 40, 124 + }, 125 + Output: fib(10), 126 + }, 127 + "newFileMultiline": { 128 + Input: "zcmZQzU|?i`U?w2V48*KJ%mKu_Kr9NxN<eH5#F0Qe0f=7$l~*z_FeL$%-)3N7vt?l5\n" + 129 + "zl3-vE2xVZ9%4J~CI>f->s?WfX|B-=Vs{#X~svra7Ekg#T|4s}nH;WnAZ)|1Y*`&cB\n" + 130 + "s(sh?X(Uz6L^!Ou&aF*u`J!eibJifSrv0z>$Q%Hd(^HIJ<Y?5`S0gT5UE&u=k\n\n", 131 + Fragment: BinaryFragment{ 132 + Size: 160, 133 + }, 134 + Output: fib(40), 135 + }, 136 + } 137 + 138 + for name, test := range tests { 139 + t.Run(name, func(t *testing.T) { 140 + p := newTestParser(test.Input, true) 141 + 142 + frag := test.Fragment 143 + err := p.ParseBinaryChunk(&frag) 144 + if test.Err { 145 + if err == nil || err == io.EOF { 146 + t.Fatalf("expected error parsing binary chunk, but got %v", err) 147 + } 148 + return 149 + } 150 + if err != nil { 151 + t.Fatalf("unexpected error parsing binary chunk: %v", err) 152 + } 153 + if !reflect.DeepEqual(test.Output, frag.Data) { 154 + t.Errorf("incorrect binary chunk\nexpected: %+v\n actual: %+v", test.Output, frag.Data) 155 + } 156 + }) 157 + } 158 + } 159 + 160 + func fib(n int) []byte { 161 + seq := []uint32{1, 1} 162 + for i := 2; i < n; i++ { 163 + seq = append(seq, seq[i-1]+seq[i-2]) 164 + } 165 + 166 + buf := make([]byte, 4*n) 167 + for i, v := range seq[:n] { 168 + binary.BigEndian.PutUint32(buf[i*4:], v) 169 + } 170 + return buf 171 + }