diff --git a/tests/test_blockhasher.py b/tests/test_blockhasher.py index f9e43d6..957d487 100644 --- a/tests/test_blockhasher.py +++ b/tests/test_blockhasher.py @@ -78,10 +78,12 @@ class TestBlockHasher(unittest2.TestCase): ]) def test_blockhash_compare(self): + #no errors block_hasher = BlockHasher(count=1) generator = block_hasher.generate("tests/data/whole_whole2_partial") self.assertEqual([], list(block_hasher.compare("tests/data/whole_whole2_partial", generator))) + #compare file is smaller (EOF errors) block_hasher = BlockHasher(count=1) generator = block_hasher.generate("tests/data/whole_whole2_partial") self.assertEqual( @@ -89,11 +91,12 @@ class TestBlockHasher(unittest2.TestCase): (2, '642027d63bb0afd7e0ba197f2c66ad03e3d70de1', 'EOF')], list(block_hasher.compare("tests/data/whole", generator))) + #no errors, huge chunks block_hasher = BlockHasher(count=10) generator = block_hasher.generate("tests/data/whole_whole2_partial") self.assertEqual([], list(block_hasher.compare("tests/data/whole_whole2_partial", generator))) - # different order to make sure seek functions + # different order to make sure seek functions are ok block_hasher = BlockHasher(count=1) checksums = list(block_hasher.generate("tests/data/whole_whole2_partial")) checksums.reverse() @@ -150,3 +153,5 @@ class TestBlockHasher(unittest2.TestCase): # (2, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1") # partial ] ) + + #NOTE: compare doesnt use skip. thats the job of its input generator \ No newline at end of file diff --git a/tests/test_treehasher.py b/tests/test_treehasher.py index 77cbced..ea3cd04 100644 --- a/tests/test_treehasher.py +++ b/tests/test_treehasher.py @@ -26,10 +26,33 @@ class TestTreeHasher(unittest2.TestCase): shelltest("mknod /tmp/treehashertest/b b 1 1") shelltest("mkfifo /tmp/treehashertest/f") + + block_hasher = BlockHasher(count=1, skip=0) + tree_hasher = TreeHasher(block_hasher) + with self.subTest("Test output, count 1, skip 0"): + self.assertEqual(list(tree_hasher.generate("/tmp/treehashertest")), [ + ('whole', 0, '3c0bf91170d873b8e327d3bafb6bc074580d11b7'), + ('dir/whole_whole2_partial', 0, '3c0bf91170d873b8e327d3bafb6bc074580d11b7'), + ('dir/whole_whole2_partial', 1, '2e863f1fcccd6642e4e28453eba10d2d3f74d798'), + ('dir/whole_whole2_partial', 2, '642027d63bb0afd7e0ba197f2c66ad03e3d70de1') + ]) + + block_hasher = BlockHasher(count=1, skip=1) + tree_hasher = TreeHasher(block_hasher) + with self.subTest("Test output, count 1, skip 1"): + self.assertEqual(list(tree_hasher.generate("/tmp/treehashertest")), [ + ('whole', 0, '3c0bf91170d873b8e327d3bafb6bc074580d11b7'), + # ('dir/whole_whole2_partial', 0, '3c0bf91170d873b8e327d3bafb6bc074580d11b7'), + ('dir/whole_whole2_partial', 1, '2e863f1fcccd6642e4e28453eba10d2d3f74d798'), + # ('dir/whole_whole2_partial', 2, '642027d63bb0afd7e0ba197f2c66ad03e3d70de1') + ]) + + + block_hasher = BlockHasher(count=2) tree_hasher = TreeHasher(block_hasher) - with self.subTest("Test output"): + with self.subTest("Test output, count 2, skip 0"): self.assertEqual(list(tree_hasher.generate("/tmp/treehashertest")), [ ('whole', 0, '3c0bf91170d873b8e327d3bafb6bc074580d11b7'), ('dir/whole_whole2_partial', 0, '959e6b58078f0cfd2fb3d37e978fda51820473ff'), @@ -57,3 +80,5 @@ class TestTreeHasher(unittest2.TestCase): self.assertEqual(list(tree_hasher.compare("/tmp/treehashertest", generator)), [('whole', '-', '-', "ERROR: [Errno 2] No such file or directory: '/tmp/treehashertest/whole'")]) + + diff --git a/zfs_autobackup/BlockHasher.py b/zfs_autobackup/BlockHasher.py index 64a90ff..5d836bf 100644 --- a/zfs_autobackup/BlockHasher.py +++ b/zfs_autobackup/BlockHasher.py @@ -12,6 +12,9 @@ class BlockHasher(): Its also possible to only read a certain percentage of blocks to just check a sample. Input and output generators are in the format ( chunk_nr, hexdigest ) + + NOTE: skipping is only used on the generator side. The compare side just compares what it gets from the input generator. + """ def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1, skip=0): @@ -30,7 +33,10 @@ class BlockHasher(): def _seek_next_chunk(self, fh, fsize): """seek fh to next chunk and update skip counter. returns chunk_nr - return false it should skip the rest of the file""" + return false it should skip the rest of the file + + + """ #ignore rempty files if fsize==0: @@ -86,7 +92,7 @@ class BlockHasher(): yield (chunk_nr, hash.hexdigest()) - def compare(self, fname, generator): + def compare(self, fname, generator): """reads from generator and compares blocks Yields mismatches in the form: ( chunk_nr, hexdigest, actual_hexdigest) Yields errors in the form: ( chunk_nr, hexdigest, "message" )