diff --git a/filetree/data.go b/filetree/data.go index 0aa8d6c..47fb89d 100644 --- a/filetree/data.go +++ b/filetree/data.go @@ -2,12 +2,12 @@ package filetree import ( "archive/tar" - "bytes" - "crypto/md5" "fmt" + "io" + + "github.com/cespare/xxhash" "github.com/sirupsen/logrus" "github.com/spf13/viper" - "io" ) const ( @@ -34,7 +34,7 @@ type ViewInfo struct { type FileInfo struct { Path string TypeFlag byte - MD5sum [16]byte + hash uint64 TarHeader tar.Header } @@ -74,26 +74,44 @@ func (view *ViewInfo) Copy() (newView *ViewInfo) { return newView } +var chuckSize = 2 * 1024 * 1024 + +func getHashFromReader(reader io.Reader) uint64 { + h := xxhash.New() + + buf := make([]byte, chuckSize) + for { + n, err := reader.Read(buf) + if err != nil && err != io.EOF { + logrus.Panic(err) + } + if n == 0 { + break + } + + h.Write(buf[:n]) + } + + return h.Sum64() +} + // NewFileInfo extracts the metadata from a tar header and file contents and generates a new FileInfo object. func NewFileInfo(reader *tar.Reader, header *tar.Header, path string) FileInfo { if header.Typeflag == tar.TypeDir { return FileInfo{ Path: path, TypeFlag: header.Typeflag, - MD5sum: [16]byte{}, + hash: 0, TarHeader: *header, } } - fileBytes := make([]byte, header.Size) - _, err := reader.Read(fileBytes) - if err != nil && err != io.EOF { - logrus.Panic(err) - } + + hash := getHashFromReader(reader) return FileInfo{ Path: path, TypeFlag: header.Typeflag, - MD5sum: md5.Sum(fileBytes), + hash: hash, TarHeader: *header, } } @@ -106,7 +124,7 @@ func (data *FileInfo) Copy() *FileInfo { return &FileInfo{ Path: data.Path, TypeFlag: data.TypeFlag, - MD5sum: data.MD5sum, + hash: data.hash, TarHeader: data.TarHeader, } } @@ -114,7 +132,7 @@ func (data *FileInfo) Copy() *FileInfo { // Compare determines the DiffType between two FileInfos based on the type and contents of each given FileInfo func (data *FileInfo) Compare(other FileInfo) DiffType { if data.TypeFlag == other.TypeFlag { - if bytes.Compare(data.MD5sum[:], other.MD5sum[:]) == 0 { + if data.hash == other.hash { return Unchanged } } diff --git a/filetree/data_test.go b/filetree/data_test.go index 7881ce7..fff79d8 100644 --- a/filetree/data_test.go +++ b/filetree/data_test.go @@ -35,7 +35,7 @@ func BlankFileChangeInfo(path string) (f *FileInfo) { result := FileInfo{ Path: path, TypeFlag: 1, - MD5sum: [16]byte{1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, + hash: 123, } return &result } diff --git a/filetree/tree_test.go b/filetree/tree_test.go index 04f8b37..5002dc6 100644 --- a/filetree/tree_test.go +++ b/filetree/tree_test.go @@ -263,7 +263,7 @@ func TestCompareWithNoChanges(t *testing.T) { fakeData := FileInfo{ Path: value, TypeFlag: 1, - MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + hash: 123, } lowerTree.AddPath(value, fakeData) upperTree.AddPath(value, fakeData) @@ -294,7 +294,7 @@ func TestCompareWithAdds(t *testing.T) { lowerTree.AddPath(value, FileInfo{ Path: value, TypeFlag: 1, - MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + hash: 123, }) } @@ -302,7 +302,7 @@ func TestCompareWithAdds(t *testing.T) { upperTree.AddPath(value, FileInfo{ Path: value, TypeFlag: 1, - MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + hash: 123, }) } @@ -354,12 +354,12 @@ func TestCompareWithChanges(t *testing.T) { lowerTree.AddPath(value, FileInfo{ Path: value, TypeFlag: 1, - MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + hash: 123, }) upperTree.AddPath(value, FileInfo{ Path: value, TypeFlag: 1, - MD5sum: [16]byte{1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, + hash: 456, }) } @@ -404,7 +404,7 @@ func TestCompareWithRemoves(t *testing.T) { fakeData := FileInfo{ Path: value, TypeFlag: 1, - MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + hash: 123, } lowerTree.AddPath(value, fakeData) } @@ -413,7 +413,7 @@ func TestCompareWithRemoves(t *testing.T) { fakeData := FileInfo{ Path: value, TypeFlag: 1, - MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + hash: 123, } upperTree.AddPath(value, fakeData) } @@ -474,7 +474,7 @@ func TestStackRange(t *testing.T) { fakeData := FileInfo{ Path: value, TypeFlag: 1, - MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + hash: 123, } lowerTree.AddPath(value, fakeData) } @@ -483,7 +483,7 @@ func TestStackRange(t *testing.T) { fakeData := FileInfo{ Path: value, TypeFlag: 1, - MD5sum: [16]byte{1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, + hash: 456, } upperTree.AddPath(value, fakeData) } @@ -500,7 +500,7 @@ func TestRemoveOnIterate(t *testing.T) { fakeData := FileInfo{ Path: value, TypeFlag: 1, - MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + hash: 123, } node, err := tree.AddPath(value, fakeData) if err == nil && stringInSlice(node.Path(), []string{"/etc"}) {