From b60dd4c1096090675c98c0d24c15aa33e6e4ba74 Mon Sep 17 00:00:00 2001 From: Edwin Eefting Date: Mon, 21 Feb 2022 00:46:54 +0100 Subject: [PATCH] wip (will usse zfs-check to do actual hashing) --- tests/test_verify.py | 4 +- zfs_autobackup/ZfsAutoverify.py | 255 ++++++++++++-------------------- 2 files changed, 100 insertions(+), 159 deletions(-) diff --git a/tests/test_verify.py b/tests/test_verify.py index 668751c..a1c80f2 100644 --- a/tests/test_verify.py +++ b/tests/test_verify.py @@ -16,10 +16,12 @@ from basetest import * # - test all directions (local, remote/local, local/remote, remote/remote) # -class TestZfsEncryption(unittest2.TestCase): +class TestZfsVerify(unittest2.TestCase): def setUp(self): + self.skipTest("WIP") + prepare_zpools() #create actual test files and data diff --git a/zfs_autobackup/ZfsAutoverify.py b/zfs_autobackup/ZfsAutoverify.py index 55327ef..37f75c7 100644 --- a/zfs_autobackup/ZfsAutoverify.py +++ b/zfs_autobackup/ZfsAutoverify.py @@ -1,80 +1,67 @@ -import os -import time - -from .ExecuteNode import ExecuteNode +# from util import activate_volume_snapshot, create_mountpoints, cleanup_mountpoint from .ZfsAuto import ZfsAuto -from .ZfsDataset import ZfsDataset from .ZfsNode import ZfsNode import sys -import platform - -def tmp_name(suffix=""): - """create temporary name unique to this process and node""" - - #we could use uuids but those are ugly and confusing - name="zfstmp_{}_{}".format(platform.node(), os.getpid()) - name=name+suffix - return name -# try to be as unix compatible as possible, while still having decent performance -def compare_trees_find(source_node, source_path, target_node, target_path): - # find /tmp/zfstmp_pve1_1993135target/ -xdev -type f -print0 | xargs -0 md5sum | md5sum -c - - #verify tree has atleast one file - - stdout=source_node.run(["find", ".", "-type", "f", - ExecuteNode.PIPE, "head", "-n1", - ], cwd=source_path) - - if not stdout: - source_node.debug("No files, skipping check") - else: - pipe=source_node.run(["find", ".", "-type", "f", "-print0", - ExecuteNode.PIPE, "xargs", "-0", "md5sum" - ], pipe=True, cwd=source_path) - stdout=target_node.run([ "md5sum", "-c", "--quiet"], inp=pipe, cwd=target_path, valid_exitcodes=[0,1]) - - if len(stdout): - for line in stdout: - target_node.error("md5sum: "+line) - - raise(Exception("Some files have checksum errors")) - - -def compare_trees_rsync(source_node, source_path, target_node, target_path): - """use rsync to compare two trees. - Advantage is that we can see which individual files differ. - But requires rsync and cant do remote to remote.""" - - cmd = ["rsync", "-rcnq", "--info=COPY,DEL,MISC,NAME,SYMSAFE", "--msgs2stderr", "--delete" ] - - #local - if source_node.ssh_to is None and target_node.ssh_to is None: - cmd.append("{}/".format(source_path)) - cmd.append("{}/".format(target_path)) - source_node.debug("Running rsync locally, on source.") - stdout, stderr = source_node.run(cmd, return_stderr=True) - - #source is local - elif source_node.ssh_to is None and target_node.ssh_to is not None: - cmd.append("{}/".format(source_path)) - cmd.append("{}:{}/".format(target_node.ssh_to, target_path)) - source_node.debug("Running rsync locally, on source.") - stdout, stderr = source_node.run(cmd, return_stderr=True) - - #target is local - elif source_node.ssh_to is not None and target_node.ssh_to is None: - cmd.append("{}:{}/".format(source_node.ssh_to, source_path)) - cmd.append("{}/".format(target_path)) - source_node.debug("Running rsync locally, on target.") - stdout, stderr=target_node.run(cmd, return_stderr=True) - - else: - raise Exception("Source and target cant both be remote when verifying. (rsync limitation)") - - if stderr: - raise Exception("Dataset verify failed, see above list for differences") +# # try to be as unix compatible as possible, while still having decent performance +# def compare_trees_find(source_node, source_path, target_node, target_path): +# # find /tmp/zfstmp_pve1_1993135target/ -xdev -type f -print0 | xargs -0 md5sum | md5sum -c +# +# #verify tree has atleast one file +# +# stdout=source_node.run(["find", ".", "-type", "f", +# ExecuteNode.PIPE, "head", "-n1", +# ], cwd=source_path) +# +# if not stdout: +# source_node.debug("No files, skipping check") +# else: +# pipe=source_node.run(["find", ".", "-type", "f", "-print0", +# ExecuteNode.PIPE, "xargs", "-0", "md5sum" +# ], pipe=True, cwd=source_path) +# stdout=target_node.run([ "md5sum", "-c", "--quiet"], inp=pipe, cwd=target_path, valid_exitcodes=[0,1]) +# +# if len(stdout): +# for line in stdout: +# target_node.error("md5sum: "+line) +# +# raise(Exception("Some files have checksum errors")) +# +# +# def compare_trees_rsync(source_node, source_path, target_node, target_path): +# """use rsync to compare two trees. +# Advantage is that we can see which individual files differ. +# But requires rsync and cant do remote to remote.""" +# +# cmd = ["rsync", "-rcnq", "--info=COPY,DEL,MISC,NAME,SYMSAFE", "--msgs2stderr", "--delete" ] +# +# #local +# if source_node.ssh_to is None and target_node.ssh_to is None: +# cmd.append("{}/".format(source_path)) +# cmd.append("{}/".format(target_path)) +# source_node.debug("Running rsync locally, on source.") +# stdout, stderr = source_node.run(cmd, return_stderr=True) +# +# #source is local +# elif source_node.ssh_to is None and target_node.ssh_to is not None: +# cmd.append("{}/".format(source_path)) +# cmd.append("{}:{}/".format(target_node.ssh_to, target_path)) +# source_node.debug("Running rsync locally, on source.") +# stdout, stderr = source_node.run(cmd, return_stderr=True) +# +# #target is local +# elif source_node.ssh_to is not None and target_node.ssh_to is None: +# cmd.append("{}:{}/".format(source_node.ssh_to, source_path)) +# cmd.append("{}/".format(target_path)) +# source_node.debug("Running rsync locally, on target.") +# stdout, stderr=target_node.run(cmd, return_stderr=True) +# +# else: +# raise Exception("Source and target cant both be remote when verifying. (rsync limitation)") +# +# if stderr: +# raise Exception("Dataset verify failed, see above list for differences") def verify_filesystem(source_snapshot, source_mnt, target_snapshot, target_mnt, method): @@ -88,8 +75,8 @@ def verify_filesystem(source_snapshot, source_mnt, target_snapshot, target_mnt, if method=='rsync': compare_trees_rsync(source_snapshot.zfs_node, source_mnt, target_snapshot.zfs_node, target_mnt) - elif method == 'tar': - compare_trees_tar(source_snapshot.zfs_node, source_mnt, target_snapshot.zfs_node, target_mnt) + # elif method == 'tar': + # compare_trees_tar(source_snapshot.zfs_node, source_mnt, target_snapshot.zfs_node, target_mnt) elif method == 'find': compare_trees_find(source_snapshot.zfs_node, source_mnt, target_snapshot.zfs_node, target_mnt) else: @@ -100,102 +87,54 @@ def verify_filesystem(source_snapshot, source_mnt, target_snapshot, target_mnt, target_snapshot.unmount() -def hash_dev(node, dev): - """calculate md5sum of a device on a node""" - - node.debug("Hashing volume {} ".format(dev)) - - cmd = [ "md5sum", dev ] - - stdout = node.run(cmd) - - if node.readonly: - hashed=None - else: - hashed = stdout[0].split(" ")[0] - - node.debug("Hash of volume {} is {}".format(dev, hashed)) - - return hashed +# def hash_dev(node, dev): +# """calculate md5sum of a device on a node""" +# +# node.debug("Hashing volume {} ".format(dev)) +# +# cmd = [ "md5sum", dev ] +# +# stdout = node.run(cmd) +# +# if node.readonly: +# hashed=None +# else: +# hashed = stdout[0].split(" ")[0] +# +# node.debug("Hash of volume {} is {}".format(dev, hashed)) +# +# return hashed -#NOTE: https://www.google.com/search?q=Mount+Path+Limit+freebsd -#Freebsd has limitations regarding path length, so we cant use the above method. -#Instead we create a temporary clone -def get_tmp_clone_name(snapshot): - pool=snapshot.zfs_node.get_pool(snapshot) - return pool.name+"/"+tmp_name() - -def activate_volume_snapshot(snapshot): - """clone volume, waits and tries to findout /dev path to the volume, in a compatible way. (linux/freebsd/smartos)""" - - clone_name=get_tmp_clone_name(snapshot) - clone=snapshot.clone(clone_name) - - #NOTE: add smartos location to this list as well - locations=[ - "/dev/zvol/" + clone_name - ] - - clone.debug("Waiting for /dev entry to appear...") - time.sleep(0.1) - - start_time=time.time() - while time.time()-start_time<10: - for location in locations: - stdout, stderr, exit_code=clone.zfs_node.run(["test", "-e", location], return_all=True, valid_exitcodes=[0,1]) - - #fake it in testmode - if clone.zfs_node.readonly: - return location - - if exit_code==0: - return location - time.sleep(1) - - raise(Exception("Timeout while waiting for {} entry to appear.".format(locations))) - -def deacitvate_volume_snapshot(snapshot): - clone_name=get_tmp_clone_name(snapshot) - clone=snapshot.zfs_node.get_dataset(clone_name) - clone.destroy(deferred=True, verbose=False) +# def deacitvate_volume_snapshot(snapshot): +# clone_name=get_tmp_clone_name(snapshot) +# clone=snapshot.zfs_node.get_dataset(clone_name) +# clone.destroy(deferred=True, verbose=False) def verify_volume(source_dataset, source_snapshot, target_dataset, target_snapshot): """compare the contents of two zfs volume snapshots""" - try: - source_dev= activate_volume_snapshot(source_snapshot) - target_dev= activate_volume_snapshot(target_snapshot) + # try: + source_dev= activate_volume_snapshot(source_snapshot) + target_dev= activate_volume_snapshot(target_snapshot) - source_hash= hash_dev(source_snapshot.zfs_node, source_dev) - target_hash= hash_dev(target_snapshot.zfs_node, target_dev) + source_hash= hash_dev(source_snapshot.zfs_node, source_dev) + target_hash= hash_dev(target_snapshot.zfs_node, target_dev) - if source_hash!=target_hash: - raise Exception("md5hash difference: {} != {}".format(source_hash, target_hash)) + if source_hash!=target_hash: + raise Exception("md5hash difference: {} != {}".format(source_hash, target_hash)) - finally: - deacitvate_volume_snapshot(source_snapshot) - deacitvate_volume_snapshot(target_snapshot) - -def create_mountpoints(source_node, target_node): - - # prepare mount points - source_node.debug("Create temporary mount point") - source_mnt = "/tmp/"+tmp_name("source") - source_node.run(["mkdir", source_mnt]) - - target_node.debug("Create temporary mount point") - target_mnt = "/tmp/"+tmp_name("target") - target_node.run(["mkdir", target_mnt]) - - return source_mnt, target_mnt + # finally: + # deacitvate_volume_snapshot(source_snapshot) + # deacitvate_volume_snapshot(target_snapshot) -def cleanup_mountpoint(node, mnt): - node.debug("Cleaning up temporary mount point") - node.run([ "rmdir", mnt ], hide_errors=True, valid_exitcodes=[] ) - +# class ZfsAutoChecksumVolume(ZfsAuto): +# def __init__(self, argv, print_arguments=True): +# +# # NOTE: common options and parameters are in ZfsAuto +# super(ZfsAutoverify, self).__init__(argv, print_arguments) class ZfsAutoverify(ZfsAuto): """The zfs-autoverify class, default agruments and stuff come from ZfsAuto"""