From bcc7983492dde927bf184938aba60f995e1df8b7 Mon Sep 17 00:00:00 2001 From: Edwin Eefting Date: Mon, 21 Feb 2022 17:51:23 +0100 Subject: [PATCH] tree compare --- zfs_autobackup/BlockHasher.py | 10 +++++++-- zfs_autobackup/TreeHasher.py | 38 +++++++++++++++++++++++++++++++++++ zfs_autobackup/ZfsCheck.py | 20 ++++++++++++++++++ 3 files changed, 66 insertions(+), 2 deletions(-) diff --git a/zfs_autobackup/BlockHasher.py b/zfs_autobackup/BlockHasher.py index 487cd86..22e2b61 100644 --- a/zfs_autobackup/BlockHasher.py +++ b/zfs_autobackup/BlockHasher.py @@ -44,11 +44,17 @@ class BlockHasher(): checked=0 with open(fname, "rb") as f: for ( chunk_nr, hexdigest ) in generator: + print ("comparing {} {} {}".format(fname, chunk_nr, hexdigest)) + checked=checked+1 hash = self.hash_class() f.seek(chunk_nr * self.bs * self.count) - for block_nr in range(0,self.count): - hash.update(f.read(self.bs)) + block_nr=0 + for block in iter(lambda: f.read(self.bs), b""): + hash.update(block) + block_nr=block_nr+1 + if block_nr == self.count: + break if (hash.hexdigest()!=hexdigest): raise Exception("Block {} mismatched! Hash is {}, but should be {}".format(chunk_nr, hash.hexdigest(), hexdigest)) diff --git a/zfs_autobackup/TreeHasher.py b/zfs_autobackup/TreeHasher.py index 1241255..e976bdf 100644 --- a/zfs_autobackup/TreeHasher.py +++ b/zfs_autobackup/TreeHasher.py @@ -5,6 +5,10 @@ class TreeHasher(): """uses BlockHasher recursively on a directory tree""" def __init__(self, block_hasher): + """ + + :type block_hasher: BlockHasher + """ self.block_hasher=block_hasher def generate(self, start_path): @@ -31,3 +35,37 @@ class TreeHasher(): finally: os.chdir(cwd) + + def compare(self, start_path, generator): + """reads from generator and compares blocks, raises exception on error + """ + + cwd=os.getcwd() + os.chdir(start_path) + + try: + current= [None] + def per_file_generator(): + + (current_file_path, chunk_nr, hash)=current[0] + yield ( chunk_nr, hash) + + for (file_path, chunk_nr, hash) in generator: + if file_path==current_file_path: + yield ( chunk_nr, hash) + else: + current[0] = (file_path, chunk_nr, hash) + return + + current[0]=None + + + current[0] = generator.next() + while current[0] is not None: + self.block_hasher.compare(current[0][0], per_file_generator()) + + finally: + os.chdir(cwd) + + + diff --git a/zfs_autobackup/ZfsCheck.py b/zfs_autobackup/ZfsCheck.py index 283762c..cf54d66 100644 --- a/zfs_autobackup/ZfsCheck.py +++ b/zfs_autobackup/ZfsCheck.py @@ -68,6 +68,11 @@ class ZfsCheck(CliBase): self.debug("Hashing tree: {}".format(mnt)) if not self.args.test: + + # generator=tree_hasher.generate(mnt) + # tree_hasher.compare(mnt, generator) + + for (file, block, hash) in tree_hasher.generate(mnt): print("{}\t{}\t{}".format(file, block, hash)) sys.stdout.flush() #important, to generate SIGPIPES on ssh disconnect @@ -158,6 +163,21 @@ def cli(): sys.exit(ZfsCheck(sys.argv[1:], False).run()) + # block_hasher=BlockHasher() + + # if sys.argv[1]=="s": + # for ( fname, nr, hash ) in TreeHasher(block_hasher).generate("/usr/src/linux-headers-5.14.14-051414"): + # print("{}\t{}\t{}".format(fname, nr, hash)) + # + # if sys.argv[1]=="r": + # + # def gen(): + # for line in sys.stdin: + # ( fname, nr, hash)=line.rstrip().split('\t') + # yield (fname, int(nr), hash) + # + # TreeHasher(block_hasher).compare("/usr/src/linux-headers-5.14.14-051414", gen()) + if __name__ == "__main__":