now yields errors and mismatches

This commit is contained in:
Edwin Eefting 2022-02-22 14:47:15 +01:00
parent 3f755fcc69
commit 6a58a294a3
4 changed files with 74 additions and 66 deletions

View File

@ -1,6 +1,7 @@
from basetest import *
from zfs_autobackup.BlockHasher import BlockHasher
# make VERY sure this works correctly under all circumstances.
# sha1 sums of files, (bs=4096)
@ -11,13 +12,13 @@ from zfs_autobackup.BlockHasher import BlockHasher
# 959e6b58078f0cfd2fb3d37e978fda51820473ff whole_whole2
# 309ffffba2e1977d12f3b7469971f30d28b94bd8 whole_whole2_partial
class TestBlockHahser(unittest2.TestCase):
class TestBlockHasher(unittest2.TestCase):
def setUp(self):
pass
def test_empty(self):
block_hasher=BlockHasher(count=1)
block_hasher = BlockHasher(count=1)
self.assertEqual(
list(block_hasher.generate("tests/data/empty")),
[]
@ -52,9 +53,9 @@ class TestBlockHahser(unittest2.TestCase):
self.assertEqual(
list(block_hasher.generate("tests/data/whole_whole2_partial")),
[
(0, "3c0bf91170d873b8e327d3bafb6bc074580d11b7"), #whole
(1, "2e863f1fcccd6642e4e28453eba10d2d3f74d798"), #whole2
(2, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1") #partial
(0, "3c0bf91170d873b8e327d3bafb6bc074580d11b7"), # whole
(1, "2e863f1fcccd6642e4e28453eba10d2d3f74d798"), # whole2
(2, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1") # partial
]
)
@ -63,38 +64,37 @@ class TestBlockHahser(unittest2.TestCase):
self.assertEqual(
list(block_hasher.generate("tests/data/whole_whole2_partial")),
[
(0, "959e6b58078f0cfd2fb3d37e978fda51820473ff"), #whole_whole2
(1, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1") #partial
(0, "959e6b58078f0cfd2fb3d37e978fda51820473ff"), # whole_whole2
(1, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1") # partial
]
)
def test_big(self):
block_hasher=BlockHasher(count=10)
block_hasher = BlockHasher(count=10)
self.assertEqual(
list(block_hasher.generate("tests/data/whole_whole2_partial")),
[
(0, "309ffffba2e1977d12f3b7469971f30d28b94bd8"), #whole_whole2_partial
(0, "309ffffba2e1977d12f3b7469971f30d28b94bd8"), # whole_whole2_partial
])
def test_blockhash_compare(self):
block_hasher = BlockHasher(count=1)
generator = block_hasher.generate("tests/data/whole_whole2_partial")
self.assertEqual([], list(block_hasher.compare("tests/data/whole_whole2_partial", generator)))
block_hasher = BlockHasher(count=1)
generator = block_hasher.generate("tests/data/whole_whole2_partial")
self.assertEqual(
[(1, '2e863f1fcccd6642e4e28453eba10d2d3f74d798', 'EOF'),
(2, '642027d63bb0afd7e0ba197f2c66ad03e3d70de1', 'EOF')],
list(block_hasher.compare("tests/data/whole", generator)))
block_hasher=BlockHasher(count=1)
generator=block_hasher.generate("tests/data/whole_whole2_partial")
self.assertEqual(3,block_hasher.compare("tests/data/whole_whole2_partial", generator))
block_hasher = BlockHasher(count=10)
generator = block_hasher.generate("tests/data/whole_whole2_partial")
self.assertEqual([], list(block_hasher.compare("tests/data/whole_whole2_partial", generator)))
block_hasher=BlockHasher(count=1)
with self.assertRaisesRegexp(Exception, "^Block 1 mismatched!"):
generator=block_hasher.generate("tests/data/whole_whole2_partial")
self.assertEqual(3,block_hasher.compare("tests/data/whole", generator))
block_hasher=BlockHasher(count=10)
generator=block_hasher.generate("tests/data/whole_whole2_partial")
self.assertEqual(1,block_hasher.compare("tests/data/whole_whole2_partial", generator))
#different order to make sure seek functions
block_hasher=BlockHasher(count=1)
checksums=list(block_hasher.generate("tests/data/whole_whole2_partial"))
# different order to make sure seek functions
block_hasher = BlockHasher(count=1)
checksums = list(block_hasher.generate("tests/data/whole_whole2_partial"))
checksums.reverse()
self.assertEqual(3,block_hasher.compare("tests/data/whole_whole2_partial", checksums))
self.assertEqual([], list(block_hasher.compare("tests/data/whole_whole2_partial", checksums)))

View File

@ -1,6 +1,7 @@
from basetest import *
from zfs_autobackup.BlockHasher import BlockHasher
# sha1 sums of files, (bs=4096)
# da39a3ee5e6b4b0d3255bfef95601890afd80709 empty
# 642027d63bb0afd7e0ba197f2c66ad03e3d70de1 partial
@ -12,47 +13,47 @@ from zfs_autobackup.BlockHasher import BlockHasher
class TestTreeHasher(unittest2.TestCase):
def test_treehasher(self):
shelltest("rm -rf /tmp/treehashertest; mkdir /tmp/treehashertest")
shelltest("cp tests/data/whole /tmp/treehashertest")
shelltest("mkdir /tmp/treehashertest/emptydir")
shelltest("mkdir /tmp/treehashertest/dir")
shelltest("cp tests/data/whole_whole2_partial /tmp/treehashertest/dir")
#it should ignore these:
# it should ignore these:
shelltest("ln -s / /tmp/treehashertest/symlink")
shelltest("mknod /tmp/treehashertest/c c 1 1")
shelltest("mknod /tmp/treehashertest/b b 1 1")
shelltest("mkfifo /tmp/treehashertest/f")
block_hasher=BlockHasher(count=2)
tree_hasher=TreeHasher(block_hasher)
block_hasher = BlockHasher(count=2)
tree_hasher = TreeHasher(block_hasher)
with self.subTest("Test output"):
self.assertEqual(list(tree_hasher.generate("/tmp/treehashertest")),[
self.assertEqual(list(tree_hasher.generate("/tmp/treehashertest")), [
('whole', 0, '3c0bf91170d873b8e327d3bafb6bc074580d11b7'),
('dir/whole_whole2_partial', 0, '959e6b58078f0cfd2fb3d37e978fda51820473ff'),
('dir/whole_whole2_partial', 1, '642027d63bb0afd7e0ba197f2c66ad03e3d70de1')
])
with self.subTest("Test compare"):
generator=tree_hasher.generate("/tmp/treehashertest")
count=tree_hasher.compare("/tmp/treehashertest", generator)
self.assertEqual(count,2)
generator = tree_hasher.generate("/tmp/treehashertest")
errors = list(tree_hasher.compare("/tmp/treehashertest", generator))
self.assertEqual(errors, [])
with self.subTest("Test mismatch"):
generator=list(tree_hasher.generate("/tmp/treehashertest"))
generator = list(tree_hasher.generate("/tmp/treehashertest"))
shelltest("cp tests/data/whole2 /tmp/treehashertest/whole")
with self.assertRaisesRegex(Exception,"mismatch"):
tree_hasher.compare("/tmp/treehashertest", generator)
self.assertEqual(list(tree_hasher.compare("/tmp/treehashertest", generator)),
[('whole',
0,
'3c0bf91170d873b8e327d3bafb6bc074580d11b7',
'2e863f1fcccd6642e4e28453eba10d2d3f74d798')])
with self.subTest("Test missig file compare"):
generator=list(tree_hasher.generate("/tmp/treehashertest"))
with self.subTest("Test missing file compare"):
generator = list(tree_hasher.generate("/tmp/treehashertest"))
shelltest("rm /tmp/treehashertest/whole")
with self.assertRaises(Exception):
tree_hasher.compare("/tmp/treehashertest", generator)
self.assertEqual(list(tree_hasher.compare("/tmp/treehashertest", generator)),
[('whole', '-', '-', "ERROR: [Errno 2] No such file or directory: 'whole'")])

View File

@ -9,11 +9,11 @@ class BlockHasher():
Its also possible to only read a certain percentage of blocks to just check a sample.
"""
def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1):
self.count=count
self.bs=bs
self.hash_class=hash_class
def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1):
self.count = count
self.bs = bs
self.hash_class = hash_class
def generate(self, fname):
"""Generates checksums
@ -39,25 +39,32 @@ class BlockHasher():
yield (chunk_nr, hash.hexdigest())
def compare(self, fname, generator):
"""reads from generator and compares blocks, raises exception on error"""
"""reads from generator and compares blocks, yields mismatches"""
checked=0
with open(fname, "rb") as f:
for ( chunk_nr, hexdigest ) in generator:
# print ("comparing {} {} {}".format(fname, chunk_nr, hexdigest))
try:
checked = 0
with open(fname, "rb") as f:
for (chunk_nr, hexdigest) in generator:
try:
checked=checked+1
hash = self.hash_class()
f.seek(chunk_nr * self.bs * self.count)
block_nr=0
for block in iter(lambda: f.read(self.bs), b""):
hash.update(block)
block_nr=block_nr+1
if block_nr == self.count:
break
checked = checked + 1
hash = self.hash_class()
f.seek(chunk_nr * self.bs * self.count)
block_nr = 0
for block in iter(lambda: f.read(self.bs), b""):
hash.update(block)
block_nr = block_nr + 1
if block_nr == self.count:
break
if (hash.hexdigest()!=hexdigest):
raise Exception("Block {} mismatched! Hash is {}, but should be {}".format(chunk_nr, hash.hexdigest(), hexdigest))
if block_nr == 0:
yield (chunk_nr, hexdigest, 'EOF')
return checked
elif (hash.hexdigest() != hexdigest):
yield (chunk_nr, hexdigest, hash.hexdigest())
except Exception as e:
yield ( chunk_nr , hexdigest, 'ERROR: '+str(e))
except Exception as e:
yield ( '-', '-', 'ERROR: '+ str(e))

View File

@ -53,11 +53,11 @@ class TreeHasher():
for file_name, group_generator in itertools.groupby(generator, lambda x: x[0]):
count=count+1
block_generator=itertools.starmap(filter_file_name, group_generator)
self.block_hasher.compare(file_name, block_generator)
for ( chunk_nr, compare_hexdigest, actual_hexdigest) in self.block_hasher.compare(file_name, block_generator):
yield ( file_name, chunk_nr, compare_hexdigest, actual_hexdigest )
finally:
os.chdir(cwd)
return count