From 6a58a294a3455db9ef4f642123a85bb22f763575 Mon Sep 17 00:00:00 2001 From: Edwin Eefting Date: Tue, 22 Feb 2022 14:47:15 +0100 Subject: [PATCH] now yields errors and mismatches --- tests/test_blockhasher.py | 52 +++++++++++++++++------------------ tests/test_treehasher.py | 35 +++++++++++------------ zfs_autobackup/BlockHasher.py | 49 +++++++++++++++++++-------------- zfs_autobackup/TreeHasher.py | 4 +-- 4 files changed, 74 insertions(+), 66 deletions(-) diff --git a/tests/test_blockhasher.py b/tests/test_blockhasher.py index e8725ea..ba57b72 100644 --- a/tests/test_blockhasher.py +++ b/tests/test_blockhasher.py @@ -1,6 +1,7 @@ from basetest import * from zfs_autobackup.BlockHasher import BlockHasher + # make VERY sure this works correctly under all circumstances. # sha1 sums of files, (bs=4096) @@ -11,13 +12,13 @@ from zfs_autobackup.BlockHasher import BlockHasher # 959e6b58078f0cfd2fb3d37e978fda51820473ff whole_whole2 # 309ffffba2e1977d12f3b7469971f30d28b94bd8 whole_whole2_partial -class TestBlockHahser(unittest2.TestCase): +class TestBlockHasher(unittest2.TestCase): def setUp(self): pass def test_empty(self): - block_hasher=BlockHasher(count=1) + block_hasher = BlockHasher(count=1) self.assertEqual( list(block_hasher.generate("tests/data/empty")), [] @@ -52,9 +53,9 @@ class TestBlockHahser(unittest2.TestCase): self.assertEqual( list(block_hasher.generate("tests/data/whole_whole2_partial")), [ - (0, "3c0bf91170d873b8e327d3bafb6bc074580d11b7"), #whole - (1, "2e863f1fcccd6642e4e28453eba10d2d3f74d798"), #whole2 - (2, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1") #partial + (0, "3c0bf91170d873b8e327d3bafb6bc074580d11b7"), # whole + (1, "2e863f1fcccd6642e4e28453eba10d2d3f74d798"), # whole2 + (2, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1") # partial ] ) @@ -63,38 +64,37 @@ class TestBlockHahser(unittest2.TestCase): self.assertEqual( list(block_hasher.generate("tests/data/whole_whole2_partial")), [ - (0, "959e6b58078f0cfd2fb3d37e978fda51820473ff"), #whole_whole2 - (1, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1") #partial + (0, "959e6b58078f0cfd2fb3d37e978fda51820473ff"), # whole_whole2 + (1, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1") # partial ] ) def test_big(self): - block_hasher=BlockHasher(count=10) + block_hasher = BlockHasher(count=10) self.assertEqual( list(block_hasher.generate("tests/data/whole_whole2_partial")), [ - (0, "309ffffba2e1977d12f3b7469971f30d28b94bd8"), #whole_whole2_partial + (0, "309ffffba2e1977d12f3b7469971f30d28b94bd8"), # whole_whole2_partial ]) def test_blockhash_compare(self): + block_hasher = BlockHasher(count=1) + generator = block_hasher.generate("tests/data/whole_whole2_partial") + self.assertEqual([], list(block_hasher.compare("tests/data/whole_whole2_partial", generator))) + block_hasher = BlockHasher(count=1) + generator = block_hasher.generate("tests/data/whole_whole2_partial") + self.assertEqual( + [(1, '2e863f1fcccd6642e4e28453eba10d2d3f74d798', 'EOF'), + (2, '642027d63bb0afd7e0ba197f2c66ad03e3d70de1', 'EOF')], + list(block_hasher.compare("tests/data/whole", generator))) - block_hasher=BlockHasher(count=1) - generator=block_hasher.generate("tests/data/whole_whole2_partial") - self.assertEqual(3,block_hasher.compare("tests/data/whole_whole2_partial", generator)) + block_hasher = BlockHasher(count=10) + generator = block_hasher.generate("tests/data/whole_whole2_partial") + self.assertEqual([], list(block_hasher.compare("tests/data/whole_whole2_partial", generator))) - block_hasher=BlockHasher(count=1) - with self.assertRaisesRegexp(Exception, "^Block 1 mismatched!"): - generator=block_hasher.generate("tests/data/whole_whole2_partial") - self.assertEqual(3,block_hasher.compare("tests/data/whole", generator)) - - block_hasher=BlockHasher(count=10) - generator=block_hasher.generate("tests/data/whole_whole2_partial") - self.assertEqual(1,block_hasher.compare("tests/data/whole_whole2_partial", generator)) - - #different order to make sure seek functions - block_hasher=BlockHasher(count=1) - checksums=list(block_hasher.generate("tests/data/whole_whole2_partial")) + # different order to make sure seek functions + block_hasher = BlockHasher(count=1) + checksums = list(block_hasher.generate("tests/data/whole_whole2_partial")) checksums.reverse() - self.assertEqual(3,block_hasher.compare("tests/data/whole_whole2_partial", checksums)) - + self.assertEqual([], list(block_hasher.compare("tests/data/whole_whole2_partial", checksums))) diff --git a/tests/test_treehasher.py b/tests/test_treehasher.py index 888df2c..33b04c0 100644 --- a/tests/test_treehasher.py +++ b/tests/test_treehasher.py @@ -1,6 +1,7 @@ from basetest import * from zfs_autobackup.BlockHasher import BlockHasher + # sha1 sums of files, (bs=4096) # da39a3ee5e6b4b0d3255bfef95601890afd80709 empty # 642027d63bb0afd7e0ba197f2c66ad03e3d70de1 partial @@ -12,47 +13,47 @@ from zfs_autobackup.BlockHasher import BlockHasher class TestTreeHasher(unittest2.TestCase): - - def test_treehasher(self): - shelltest("rm -rf /tmp/treehashertest; mkdir /tmp/treehashertest") shelltest("cp tests/data/whole /tmp/treehashertest") shelltest("mkdir /tmp/treehashertest/emptydir") shelltest("mkdir /tmp/treehashertest/dir") shelltest("cp tests/data/whole_whole2_partial /tmp/treehashertest/dir") - #it should ignore these: + # it should ignore these: shelltest("ln -s / /tmp/treehashertest/symlink") shelltest("mknod /tmp/treehashertest/c c 1 1") shelltest("mknod /tmp/treehashertest/b b 1 1") shelltest("mkfifo /tmp/treehashertest/f") - block_hasher=BlockHasher(count=2) - tree_hasher=TreeHasher(block_hasher) + block_hasher = BlockHasher(count=2) + tree_hasher = TreeHasher(block_hasher) with self.subTest("Test output"): - self.assertEqual(list(tree_hasher.generate("/tmp/treehashertest")),[ + self.assertEqual(list(tree_hasher.generate("/tmp/treehashertest")), [ ('whole', 0, '3c0bf91170d873b8e327d3bafb6bc074580d11b7'), ('dir/whole_whole2_partial', 0, '959e6b58078f0cfd2fb3d37e978fda51820473ff'), ('dir/whole_whole2_partial', 1, '642027d63bb0afd7e0ba197f2c66ad03e3d70de1') ]) with self.subTest("Test compare"): - generator=tree_hasher.generate("/tmp/treehashertest") - count=tree_hasher.compare("/tmp/treehashertest", generator) - self.assertEqual(count,2) + generator = tree_hasher.generate("/tmp/treehashertest") + errors = list(tree_hasher.compare("/tmp/treehashertest", generator)) + self.assertEqual(errors, []) with self.subTest("Test mismatch"): - generator=list(tree_hasher.generate("/tmp/treehashertest")) + generator = list(tree_hasher.generate("/tmp/treehashertest")) shelltest("cp tests/data/whole2 /tmp/treehashertest/whole") - with self.assertRaisesRegex(Exception,"mismatch"): - tree_hasher.compare("/tmp/treehashertest", generator) + self.assertEqual(list(tree_hasher.compare("/tmp/treehashertest", generator)), + [('whole', + 0, + '3c0bf91170d873b8e327d3bafb6bc074580d11b7', + '2e863f1fcccd6642e4e28453eba10d2d3f74d798')]) - with self.subTest("Test missig file compare"): - generator=list(tree_hasher.generate("/tmp/treehashertest")) + with self.subTest("Test missing file compare"): + generator = list(tree_hasher.generate("/tmp/treehashertest")) shelltest("rm /tmp/treehashertest/whole") - with self.assertRaises(Exception): - tree_hasher.compare("/tmp/treehashertest", generator) + self.assertEqual(list(tree_hasher.compare("/tmp/treehashertest", generator)), + [('whole', '-', '-', "ERROR: [Errno 2] No such file or directory: 'whole'")]) diff --git a/zfs_autobackup/BlockHasher.py b/zfs_autobackup/BlockHasher.py index faf596f..199ba5b 100644 --- a/zfs_autobackup/BlockHasher.py +++ b/zfs_autobackup/BlockHasher.py @@ -9,11 +9,11 @@ class BlockHasher(): Its also possible to only read a certain percentage of blocks to just check a sample. """ - def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1): - self.count=count - self.bs=bs - self.hash_class=hash_class + def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1): + self.count = count + self.bs = bs + self.hash_class = hash_class def generate(self, fname): """Generates checksums @@ -39,25 +39,32 @@ class BlockHasher(): yield (chunk_nr, hash.hexdigest()) def compare(self, fname, generator): - """reads from generator and compares blocks, raises exception on error""" + """reads from generator and compares blocks, yields mismatches""" - checked=0 - with open(fname, "rb") as f: - for ( chunk_nr, hexdigest ) in generator: - # print ("comparing {} {} {}".format(fname, chunk_nr, hexdigest)) + try: + checked = 0 + with open(fname, "rb") as f: + for (chunk_nr, hexdigest) in generator: + try: - checked=checked+1 - hash = self.hash_class() - f.seek(chunk_nr * self.bs * self.count) - block_nr=0 - for block in iter(lambda: f.read(self.bs), b""): - hash.update(block) - block_nr=block_nr+1 - if block_nr == self.count: - break + checked = checked + 1 + hash = self.hash_class() + f.seek(chunk_nr * self.bs * self.count) + block_nr = 0 + for block in iter(lambda: f.read(self.bs), b""): + hash.update(block) + block_nr = block_nr + 1 + if block_nr == self.count: + break - if (hash.hexdigest()!=hexdigest): - raise Exception("Block {} mismatched! Hash is {}, but should be {}".format(chunk_nr, hash.hexdigest(), hexdigest)) + if block_nr == 0: + yield (chunk_nr, hexdigest, 'EOF') - return checked + elif (hash.hexdigest() != hexdigest): + yield (chunk_nr, hexdigest, hash.hexdigest()) + except Exception as e: + yield ( chunk_nr , hexdigest, 'ERROR: '+str(e)) + + except Exception as e: + yield ( '-', '-', 'ERROR: '+ str(e)) \ No newline at end of file diff --git a/zfs_autobackup/TreeHasher.py b/zfs_autobackup/TreeHasher.py index 54bec1e..c243d08 100644 --- a/zfs_autobackup/TreeHasher.py +++ b/zfs_autobackup/TreeHasher.py @@ -53,11 +53,11 @@ class TreeHasher(): for file_name, group_generator in itertools.groupby(generator, lambda x: x[0]): count=count+1 block_generator=itertools.starmap(filter_file_name, group_generator) - self.block_hasher.compare(file_name, block_generator) + for ( chunk_nr, compare_hexdigest, actual_hexdigest) in self.block_hasher.compare(file_name, block_generator): + yield ( file_name, chunk_nr, compare_hexdigest, actual_hexdigest ) finally: os.chdir(cwd) - return count