incremental block hasher (for zfs-verify)

This commit is contained in:
Edwin Eefting 2022-02-20 12:59:43 +01:00
parent 3ca1bce9b2
commit 4d27b3b6ea
2 changed files with 105 additions and 0 deletions

66
tests/test_check.py Normal file
View File

@ -0,0 +1,66 @@
from basetest import *
from zfs_autobackup.util import *
class TestZfsEncryption(unittest2.TestCase):
def setUp(self):
pass
def test_blockhash(self):
# sha1 sums of files, (bs=4096)
# da39a3ee5e6b4b0d3255bfef95601890afd80709 empty
# 642027d63bb0afd7e0ba197f2c66ad03e3d70de1 partial
# 3c0bf91170d873b8e327d3bafb6bc074580d11b7 whole
# 2e863f1fcccd6642e4e28453eba10d2d3f74d798 whole2
# 959e6b58078f0cfd2fb3d37e978fda51820473ff whole_whole2
# 309ffffba2e1977d12f3b7469971f30d28b94bd8 whole_whole2_partial
self.assertEqual(
list(block_hash("tests/data/empty", count=1)),
[]
)
self.assertEqual(
list(block_hash("tests/data/partial", count=1)),
[(0, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1")]
)
self.assertEqual(
list(block_hash("tests/data/whole", count=1)),
[(0, "3c0bf91170d873b8e327d3bafb6bc074580d11b7")]
)
self.assertEqual(
list(block_hash("tests/data/whole_whole2", count=1)),
[
(0, "3c0bf91170d873b8e327d3bafb6bc074580d11b7"),
(1, "2e863f1fcccd6642e4e28453eba10d2d3f74d798")
]
)
self.assertEqual(
list(block_hash("tests/data/whole_whole2_partial", count=1)),
[
(0, "3c0bf91170d873b8e327d3bafb6bc074580d11b7"), #whole
(1, "2e863f1fcccd6642e4e28453eba10d2d3f74d798"), #whole2
(2, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1") #partial
]
)
self.assertEqual(
list(block_hash("tests/data/whole_whole2_partial", count=2)),
[
(0, "959e6b58078f0cfd2fb3d37e978fda51820473ff"), #whole_whole2
(1, "642027d63bb0afd7e0ba197f2c66ad03e3d70de1") #partial
]
)
self.assertEqual(
list(block_hash("tests/data/whole_whole2_partial", count=10)),
[
(0, "309ffffba2e1977d12f3b7469971f30d28b94bd8"), #whole_whole2_partial
]
)

39
zfs_autobackup/util.py Normal file
View File

@ -0,0 +1,39 @@
import hashlib
# root@psyt14s:/home/psy/zfs_autobackup# ls -lh /home/psy/Downloads/carimage.zip
# -rw-rw-r-- 1 psy psy 990M Nov 26 2020 /home/psy/Downloads/carimage.zip
# root@psyt14s:/home/psy/zfs_autobackup# time sha1sum /home/psy/Downloads/carimage.zip
# a682e1a36e16fe0d0c2f011104f4a99004f19105 /home/psy/Downloads/carimage.zip
#
# real 0m2.558s
# user 0m2.105s
# sys 0m0.448s
# root@psyt14s:/home/psy/zfs_autobackup# time python3 -m zfs_autobackup.ZfsCheck
#
# real 0m1.459s
# user 0m0.993s
# sys 0m0.462s
# NOTE: surprisingly sha1 in via python3 is faster than the native sha1sum utility, even in the way we use below!
def block_hash(fname, count=10000, bs=4006):
"""yields sha1 hash per count blocks.
yields(chunk_nr, hexdigest)
yields nothing for empty files.
"""
with open(fname, "rb") as f:
hash = hashlib.sha1()
block_nr = 0
chunk_nr = 0
for block in iter(lambda: f.read(4096), b""):
hash.update(block)
block_nr = block_nr + 1
if block_nr % count == 0:
yield (chunk_nr, hash.hexdigest())
chunk_nr = chunk_nr + 1
hash = hashlib.sha1()
# yield last (incomplete) block
if block_nr % count != 0:
yield (chunk_nr, hash.hexdigest())