From a115f0bd17e20ecb758c6473700f48791bebfecc Mon Sep 17 00:00:00 2001 From: Edwin Eefting Date: Sun, 20 Feb 2022 17:30:02 +0100 Subject: [PATCH] zfs check initial version (wip) --- zfs_autobackup/ZfsCheck.py | 77 ++++++++++++++++++++++++++++++++++---- zfs_autobackup/ZfsNode.py | 8 ++-- zfs_autobackup/util.py | 72 +++++++++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 12 deletions(-) diff --git a/zfs_autobackup/ZfsCheck.py b/zfs_autobackup/ZfsCheck.py index 5fab948..3f4264f 100644 --- a/zfs_autobackup/ZfsCheck.py +++ b/zfs_autobackup/ZfsCheck.py @@ -1,11 +1,9 @@ import hashlib -from .util import block_hash +from .ZfsNode import ZfsNode +from .util import * from .CliBase import CliBase - - - class ZfsCheck(CliBase): def __init__(self, argv, print_arguments=True): @@ -13,15 +11,78 @@ class ZfsCheck(CliBase): # NOTE: common options and parameters are in ZfsAuto super(ZfsCheck, self).__init__(argv, print_arguments) + self.node=ZfsNode(self.log, readonly=self.args.test, debug_output=self.args.debug_output) + + + def get_parser(self): + + parser=super(ZfsCheck, self).get_parser() + + #positional arguments + parser.add_argument('snapshot', metavar='SNAPSHOT', default=None, nargs='?', + help='Snapshot to checksum') + + + group=parser.add_argument_group('Hasher options') + + group.add_argument('--block-size', metavar="BYTES", default=4096, help="Read block-size, default %(default)s", type=int) + group.add_argument('--count', metavar="COUNT", default=int((100*(1024**2))/4096), help="Generate a hash for every COUNT blocks. default %(default)s", type=int) #100MiB + + return parser + + def parse_args(self, argv): + args=super(ZfsCheck, self).parse_args(argv) + + if args.test: + self.warning("TEST MODE - NOT DOING ANYTHING USEFULL") + self.log.show_debug=True #show at least what we would do + + return args + + def hash_filesystem(self, snapshot): + """ + + :type snapshot: ZfsDataset.ZfsDataset + """ + mnt="/tmp/"+tmp_name() + + try: + self.debug("Create temporary mount point {}".format(mnt)) + self.node.run(["mkdir", mnt]) + + snapshot.mount(mnt) + + self.debug("Hashing tree: {}".format(mnt)) + if not self.args.test: + for (file, block, hash) in block_hash_tree(mnt): + print("{}\t{}\t{}".format(file, block, hash)) + + finally: + self.debug("Cleaning up temporary mount point") + snapshot.unmount() + self.node.run(["rmdir", mnt], hide_errors=True, valid_exitcodes=[]) + + def run(self): + snapshot=self.node.get_dataset(self.args.snapshot) + if not snapshot.exists: + snapshot.error("Dataset not found") + sys.exit(1) - # print(sha1sum("/home/psy/Downloads/carimage.zip")) - for (block, h ) in block_hash("/home/psy/Downloads/carimage.zip" , count=10000): - print(block) - print (h) + if not snapshot.is_snapshot: + snapshot.error("Dataset should be a snapshot") + sys.exit(1) + dataset_type=snapshot.parent.properties['type'] + + if dataset_type=='volume': + self.checksum_volume(snapshot) + elif dataset_type=='filesystem': + self.hash_filesystem(snapshot) + else: + raise Exception("huh?") pass diff --git a/zfs_autobackup/ZfsNode.py b/zfs_autobackup/ZfsNode.py index a9fdd60..1e0b55d 100644 --- a/zfs_autobackup/ZfsNode.py +++ b/zfs_autobackup/ZfsNode.py @@ -17,7 +17,7 @@ from .ExecuteNode import ExecuteError class ZfsNode(ExecuteNode): """a node that contains zfs datasets. implements global (systemwide/pool wide) zfs commands""" - def __init__(self, snapshot_time_format, hold_name, logger, ssh_config=None, ssh_to=None, readonly=False, + def __init__(self, logger, snapshot_time_format="", hold_name="", ssh_config=None, ssh_to=None, readonly=False, description="", debug_output=False, thinner=None): @@ -32,9 +32,9 @@ class ZfsNode(ExecuteNode): self.verbose("Using custom SSH config: {}".format(ssh_config)) if ssh_to: - self.verbose("Datasets on: {}".format(ssh_to)) - else: - self.verbose("Datasets are local") + self.verbose("SSH to: {}".format(ssh_to)) + # else: + # self.verbose("Datasets are local") if thinner is not None: rules = thinner.human_rules() diff --git a/zfs_autobackup/util.py b/zfs_autobackup/util.py index acd8e99..e18c8fc 100644 --- a/zfs_autobackup/util.py +++ b/zfs_autobackup/util.py @@ -15,11 +15,21 @@ import hashlib # sys 0m0.462s # NOTE: surprisingly sha1 in via python3 is faster than the native sha1sum utility, even in the way we use below! +import os +import platform +import sys +import time + +import pathlib as pathlib + + def block_hash(fname, count=10000, bs=4006): """yields sha1 hash per count blocks. yields(chunk_nr, hexdigest) yields nothing for empty files. + + This function was created to checksum huge files and blockdevices (TB's) """ with open(fname, "rb") as f: @@ -37,3 +47,65 @@ def block_hash(fname, count=10000, bs=4006): # yield last (incomplete) block if block_nr % count != 0: yield (chunk_nr, hash.hexdigest()) + +def block_hash_tree(start_path, count=10000, bs=4096): + """block_hash every file in a tree, yielding results""" + + os.chdir(start_path) + + for f in pathlib.Path('.').glob('**/*'): + if f.is_file() and not f.is_symlink(): + for (chunk_nr, hash) in block_hash(f, count, bs): + + yield ( f, chunk_nr, hash) + + +def tmp_name(suffix=""): + """create temporary name unique to this process and node""" + + #we could use uuids but those are ugly and confusing + name="{}_{}_{}".format( + os.path.basename(sys.argv[0]), + platform.node(), + os.getpid()) + name=name+suffix + return name + + +def get_tmp_clone_name(snapshot): + pool=snapshot.zfs_node.get_pool(snapshot) + return pool.name+"/"+tmp_name() + + +#NOTE: https://www.google.com/search?q=Mount+Path+Limit+freebsd +#Freebsd has limitations regarding path length, so we have to clone it so the part stays sort +def activate_volume_snapshot(snapshot): + """clone volume, waits and tries to findout /dev path to the volume, in a compatible way. (linux/freebsd/smartos)""" + + clone_name= get_tmp_clone_name(snapshot) + clone=snapshot.clone(clone_name) + + #NOTE: add smartos location to this list as well + locations=[ + "/dev/zvol/" + clone_name + ] + + clone.debug("Waiting for /dev entry to appear...") + time.sleep(0.1) + + start_time=time.time() + while time.time()-start_time<10: + for location in locations: + stdout, stderr, exit_code=clone.zfs_node.run(["test", "-e", location], return_all=True, valid_exitcodes=[0,1]) + + #fake it in testmode + if clone.zfs_node.readonly: + return location + + if exit_code==0: + return location + time.sleep(1) + + raise(Exception("Timeout while waiting for {} entry to appear.".format(locations))) + +