forked from third-party-mirrors/zfs_autobackup
70 lines
2.4 KiB
Python
70 lines
2.4 KiB
Python
import hashlib
|
|
|
|
|
|
class BlockHasher():
|
|
"""This class was created to checksum huge files and blockdevices (TB's)
|
|
Instead of one sha1sum of the whole file, it generates sha1susms of chunks of the file.
|
|
|
|
The chunksize is count*bs (bs is the read blocksize from disk)
|
|
|
|
Its also possible to only read a certain percentage of blocks to just check a sample.
|
|
"""
|
|
|
|
def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1):
|
|
self.count = count
|
|
self.bs = bs
|
|
self.hash_class = hash_class
|
|
|
|
def generate(self, fname):
|
|
"""Generates checksums
|
|
|
|
yields(chunk_nr, hexdigest)
|
|
|
|
yields nothing for empty files.
|
|
"""
|
|
with open(fname, "rb") as f:
|
|
hash = self.hash_class()
|
|
block_nr = 0
|
|
chunk_nr = 0
|
|
for block in iter(lambda: f.read(self.bs), b""):
|
|
hash.update(block)
|
|
block_nr = block_nr + 1
|
|
if block_nr % self.count == 0:
|
|
yield (chunk_nr, hash.hexdigest())
|
|
chunk_nr = chunk_nr + 1
|
|
hash = self.hash_class()
|
|
|
|
# yield last (incomplete) block
|
|
if block_nr % self.count != 0:
|
|
yield (chunk_nr, hash.hexdigest())
|
|
|
|
def compare(self, fname, generator):
|
|
"""reads from generator and compares blocks, yields mismatches"""
|
|
|
|
try:
|
|
checked = 0
|
|
with open(fname, "rb") as f:
|
|
for (chunk_nr, hexdigest) in generator:
|
|
try:
|
|
|
|
checked = checked + 1
|
|
hash = self.hash_class()
|
|
f.seek(chunk_nr * self.bs * self.count)
|
|
block_nr = 0
|
|
for block in iter(lambda: f.read(self.bs), b""):
|
|
hash.update(block)
|
|
block_nr = block_nr + 1
|
|
if block_nr == self.count:
|
|
break
|
|
|
|
if block_nr == 0:
|
|
yield (chunk_nr, hexdigest, 'EOF')
|
|
|
|
elif (hash.hexdigest() != hexdigest):
|
|
yield (chunk_nr, hexdigest, hash.hexdigest())
|
|
|
|
except Exception as e:
|
|
yield ( chunk_nr , hexdigest, 'ERROR: '+str(e))
|
|
|
|
except Exception as e:
|
|
yield ( '-', '-', 'ERROR: '+ str(e)) |