Accelerate file hashing with xxhash (#102)
This commit is contained in:
parent
bf8dde78d9
commit
530ea555cb
@ -2,12 +2,12 @@ package filetree
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"crypto/md5"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/cespare/xxhash"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/spf13/viper"
|
||||
"io"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -34,7 +34,7 @@ type ViewInfo struct {
|
||||
type FileInfo struct {
|
||||
Path string
|
||||
TypeFlag byte
|
||||
MD5sum [16]byte
|
||||
hash uint64
|
||||
TarHeader tar.Header
|
||||
}
|
||||
|
||||
@ -74,26 +74,44 @@ func (view *ViewInfo) Copy() (newView *ViewInfo) {
|
||||
return newView
|
||||
}
|
||||
|
||||
var chuckSize = 2 * 1024 * 1024
|
||||
|
||||
func getHashFromReader(reader io.Reader) uint64 {
|
||||
h := xxhash.New()
|
||||
|
||||
buf := make([]byte, chuckSize)
|
||||
for {
|
||||
n, err := reader.Read(buf)
|
||||
if err != nil && err != io.EOF {
|
||||
logrus.Panic(err)
|
||||
}
|
||||
if n == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
h.Write(buf[:n])
|
||||
}
|
||||
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
// NewFileInfo extracts the metadata from a tar header and file contents and generates a new FileInfo object.
|
||||
func NewFileInfo(reader *tar.Reader, header *tar.Header, path string) FileInfo {
|
||||
if header.Typeflag == tar.TypeDir {
|
||||
return FileInfo{
|
||||
Path: path,
|
||||
TypeFlag: header.Typeflag,
|
||||
MD5sum: [16]byte{},
|
||||
hash: 0,
|
||||
TarHeader: *header,
|
||||
}
|
||||
}
|
||||
fileBytes := make([]byte, header.Size)
|
||||
_, err := reader.Read(fileBytes)
|
||||
if err != nil && err != io.EOF {
|
||||
logrus.Panic(err)
|
||||
}
|
||||
|
||||
hash := getHashFromReader(reader)
|
||||
|
||||
return FileInfo{
|
||||
Path: path,
|
||||
TypeFlag: header.Typeflag,
|
||||
MD5sum: md5.Sum(fileBytes),
|
||||
hash: hash,
|
||||
TarHeader: *header,
|
||||
}
|
||||
}
|
||||
@ -106,7 +124,7 @@ func (data *FileInfo) Copy() *FileInfo {
|
||||
return &FileInfo{
|
||||
Path: data.Path,
|
||||
TypeFlag: data.TypeFlag,
|
||||
MD5sum: data.MD5sum,
|
||||
hash: data.hash,
|
||||
TarHeader: data.TarHeader,
|
||||
}
|
||||
}
|
||||
@ -114,7 +132,7 @@ func (data *FileInfo) Copy() *FileInfo {
|
||||
// Compare determines the DiffType between two FileInfos based on the type and contents of each given FileInfo
|
||||
func (data *FileInfo) Compare(other FileInfo) DiffType {
|
||||
if data.TypeFlag == other.TypeFlag {
|
||||
if bytes.Compare(data.MD5sum[:], other.MD5sum[:]) == 0 {
|
||||
if data.hash == other.hash {
|
||||
return Unchanged
|
||||
}
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ func BlankFileChangeInfo(path string) (f *FileInfo) {
|
||||
result := FileInfo{
|
||||
Path: path,
|
||||
TypeFlag: 1,
|
||||
MD5sum: [16]byte{1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0},
|
||||
hash: 123,
|
||||
}
|
||||
return &result
|
||||
}
|
||||
|
@ -263,7 +263,7 @@ func TestCompareWithNoChanges(t *testing.T) {
|
||||
fakeData := FileInfo{
|
||||
Path: value,
|
||||
TypeFlag: 1,
|
||||
MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
hash: 123,
|
||||
}
|
||||
lowerTree.AddPath(value, fakeData)
|
||||
upperTree.AddPath(value, fakeData)
|
||||
@ -294,7 +294,7 @@ func TestCompareWithAdds(t *testing.T) {
|
||||
lowerTree.AddPath(value, FileInfo{
|
||||
Path: value,
|
||||
TypeFlag: 1,
|
||||
MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
hash: 123,
|
||||
})
|
||||
}
|
||||
|
||||
@ -302,7 +302,7 @@ func TestCompareWithAdds(t *testing.T) {
|
||||
upperTree.AddPath(value, FileInfo{
|
||||
Path: value,
|
||||
TypeFlag: 1,
|
||||
MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
hash: 123,
|
||||
})
|
||||
}
|
||||
|
||||
@ -354,12 +354,12 @@ func TestCompareWithChanges(t *testing.T) {
|
||||
lowerTree.AddPath(value, FileInfo{
|
||||
Path: value,
|
||||
TypeFlag: 1,
|
||||
MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
hash: 123,
|
||||
})
|
||||
upperTree.AddPath(value, FileInfo{
|
||||
Path: value,
|
||||
TypeFlag: 1,
|
||||
MD5sum: [16]byte{1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0},
|
||||
hash: 456,
|
||||
})
|
||||
}
|
||||
|
||||
@ -404,7 +404,7 @@ func TestCompareWithRemoves(t *testing.T) {
|
||||
fakeData := FileInfo{
|
||||
Path: value,
|
||||
TypeFlag: 1,
|
||||
MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
hash: 123,
|
||||
}
|
||||
lowerTree.AddPath(value, fakeData)
|
||||
}
|
||||
@ -413,7 +413,7 @@ func TestCompareWithRemoves(t *testing.T) {
|
||||
fakeData := FileInfo{
|
||||
Path: value,
|
||||
TypeFlag: 1,
|
||||
MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
hash: 123,
|
||||
}
|
||||
upperTree.AddPath(value, fakeData)
|
||||
}
|
||||
@ -474,7 +474,7 @@ func TestStackRange(t *testing.T) {
|
||||
fakeData := FileInfo{
|
||||
Path: value,
|
||||
TypeFlag: 1,
|
||||
MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
hash: 123,
|
||||
}
|
||||
lowerTree.AddPath(value, fakeData)
|
||||
}
|
||||
@ -483,7 +483,7 @@ func TestStackRange(t *testing.T) {
|
||||
fakeData := FileInfo{
|
||||
Path: value,
|
||||
TypeFlag: 1,
|
||||
MD5sum: [16]byte{1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0},
|
||||
hash: 456,
|
||||
}
|
||||
upperTree.AddPath(value, fakeData)
|
||||
}
|
||||
@ -500,7 +500,7 @@ func TestRemoveOnIterate(t *testing.T) {
|
||||
fakeData := FileInfo{
|
||||
Path: value,
|
||||
TypeFlag: 1,
|
||||
MD5sum: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
hash: 123,
|
||||
}
|
||||
node, err := tree.AddPath(value, fakeData)
|
||||
if err == nil && stringInSlice(node.Path(), []string{"/etc"}) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user