diff --git a/tests/test_check.py b/tests/test_check.py new file mode 100644 index 0000000..85b2d8b --- /dev/null +++ b/tests/test_check.py @@ -0,0 +1,66 @@ +from basetest import * +from zfs_autobackup.util import * + + +class TestZfsEncryption(unittest2.TestCase): + + def setUp(self): + pass + + def test_blockhash(self): + # sha1 sums of files, (bs=4096) + # da39a3ee5e6b4b0d3255bfef95601890afd80709 empty + # 642027d63bb0afd7e0ba197f2c66ad03e3d70de1 partial + # 3c0bf91170d873b8e327d3bafb6bc074580d11b7 whole + # 2e863f1fcccd6642e4e28453eba10d2d3f74d798 whole2 + # 959e6b58078f0cfd2fb3d37e978fda51820473ff whole_whole2 + # 309ffffba2e1977d12f3b7469971f30d28b94bd8 whole_whole2_partial + + + self.assertEqual( + list(block_hash("tests/data/empty", count=1)), + [] + ) + + self.assertEqual( + list(block_hash("tests/data/partial", count=1)), + [(0, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1")] + ) + + self.assertEqual( + list(block_hash("tests/data/whole", count=1)), + [(0, "3c0bf91170d873b8e327d3bafb6bc074580d11b7")] + ) + + self.assertEqual( + list(block_hash("tests/data/whole_whole2", count=1)), + [ + (0, "3c0bf91170d873b8e327d3bafb6bc074580d11b7"), + (1, "2e863f1fcccd6642e4e28453eba10d2d3f74d798") + ] + ) + + self.assertEqual( + list(block_hash("tests/data/whole_whole2_partial", count=1)), + [ + (0, "3c0bf91170d873b8e327d3bafb6bc074580d11b7"), #whole + (1, "2e863f1fcccd6642e4e28453eba10d2d3f74d798"), #whole2 + (2, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1") #partial + ] + ) + + self.assertEqual( + list(block_hash("tests/data/whole_whole2_partial", count=2)), + [ + (0, "959e6b58078f0cfd2fb3d37e978fda51820473ff"), #whole_whole2 + (1, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1") #partial + ] + ) + + self.assertEqual( + list(block_hash("tests/data/whole_whole2_partial", count=10)), + [ + (0, "309ffffba2e1977d12f3b7469971f30d28b94bd8"), #whole_whole2_partial + ] + ) + diff --git a/zfs_autobackup/util.py b/zfs_autobackup/util.py new file mode 100644 index 0000000..acd8e99 --- /dev/null +++ b/zfs_autobackup/util.py @@ -0,0 +1,39 @@ +import hashlib + +# root@psyt14s:/home/psy/zfs_autobackup# ls -lh /home/psy/Downloads/carimage.zip +# -rw-rw-r-- 1 psy psy 990M Nov 26 2020 /home/psy/Downloads/carimage.zip +# root@psyt14s:/home/psy/zfs_autobackup# time sha1sum /home/psy/Downloads/carimage.zip +# a682e1a36e16fe0d0c2f011104f4a99004f19105 /home/psy/Downloads/carimage.zip +# +# real 0m2.558s +# user 0m2.105s +# sys 0m0.448s +# root@psyt14s:/home/psy/zfs_autobackup# time python3 -m zfs_autobackup.ZfsCheck +# +# real 0m1.459s +# user 0m0.993s +# sys 0m0.462s + +# NOTE: surprisingly sha1 in via python3 is faster than the native sha1sum utility, even in the way we use below! +def block_hash(fname, count=10000, bs=4006): + """yields sha1 hash per count blocks. + yields(chunk_nr, hexdigest) + + yields nothing for empty files. + """ + + with open(fname, "rb") as f: + hash = hashlib.sha1() + block_nr = 0 + chunk_nr = 0 + for block in iter(lambda: f.read(4096), b""): + hash.update(block) + block_nr = block_nr + 1 + if block_nr % count == 0: + yield (chunk_nr, hash.hexdigest()) + chunk_nr = chunk_nr + 1 + hash = hashlib.sha1() + + # yield last (incomplete) block + if block_nr % count != 0: + yield (chunk_nr, hash.hexdigest())