zfs check initial version (wip)

2025-07-01 15:57:15 +03:00 · 2022-02-20 17:30:02 +01:00 · 2022-02-20 17:30:02 +01:00 · a115f0bd17
commit a115f0bd17
parent 626c84fe47
3 changed files with 145 additions and 12 deletions
--- a/zfs_autobackup/ZfsCheck.py
+++ b/zfs_autobackup/ZfsCheck.py
@ -1,11 +1,9 @@
 import hashlib

-from .util import block_hash
+from .ZfsNode import ZfsNode
+from .util import *
 from .CliBase import CliBase

-
-
-
 class ZfsCheck(CliBase):

    def __init__(self, argv, print_arguments=True):
@ -13,15 +11,78 @@ class ZfsCheck(CliBase):
        # NOTE: common options and parameters are in ZfsAuto
        super(ZfsCheck, self).__init__(argv, print_arguments)

+        self.node=ZfsNode(self.log, readonly=self.args.test, debug_output=self.args.debug_output)
+
+
+    def get_parser(self):
+
+        parser=super(ZfsCheck, self).get_parser()
+
+        #positional arguments
+        parser.add_argument('snapshot', metavar='SNAPSHOT', default=None, nargs='?',
+                            help='Snapshot to checksum')
+
+
+        group=parser.add_argument_group('Hasher options')
+
+        group.add_argument('--block-size', metavar="BYTES", default=4096, help="Read block-size, default %(default)s", type=int)
+        group.add_argument('--count', metavar="COUNT", default=int((100*(1024**2))/4096), help="Generate a hash for every COUNT blocks. default %(default)s", type=int) #100MiB
+
+        return parser
+
+    def parse_args(self, argv):
+        args=super(ZfsCheck, self).parse_args(argv)
+
+        if args.test:
+            self.warning("TEST MODE - NOT DOING ANYTHING USEFULL")
+            self.log.show_debug=True #show at least what we would do
+
+        return args
+
+    def hash_filesystem(self, snapshot):
+        """
+
+        :type snapshot: ZfsDataset.ZfsDataset
+        """
+        mnt="/tmp/"+tmp_name()
+
+        try:
+            self.debug("Create temporary mount point {}".format(mnt))
+            self.node.run(["mkdir", mnt])
+
+            snapshot.mount(mnt)
+
+            self.debug("Hashing tree: {}".format(mnt))
+            if not self.args.test:
+                for (file, block, hash) in block_hash_tree(mnt):
+                    print("{}\t{}\t{}".format(file, block, hash))
+
+        finally:
+            self.debug("Cleaning up temporary mount point")
+            snapshot.unmount()
+            self.node.run(["rmdir", mnt], hide_errors=True, valid_exitcodes=[])
+
+
    def run(self):

+        snapshot=self.node.get_dataset(self.args.snapshot)

+        if not snapshot.exists:
+            snapshot.error("Dataset not found")
+            sys.exit(1)

-        # print(sha1sum("/home/psy/Downloads/carimage.zip"))
-        for (block, h ) in block_hash("/home/psy/Downloads/carimage.zip" , count=10000):
-            print(block)
-            print (h)
+        if not snapshot.is_snapshot:
+            snapshot.error("Dataset should be a snapshot")
+            sys.exit(1)

+        dataset_type=snapshot.parent.properties['type']
+
+        if dataset_type=='volume':
+            self.checksum_volume(snapshot)
+        elif dataset_type=='filesystem':
+            self.hash_filesystem(snapshot)
+        else:
+            raise Exception("huh?")

        pass

--- a/zfs_autobackup/ZfsNode.py
+++ b/zfs_autobackup/ZfsNode.py
@ -17,7 +17,7 @@ from .ExecuteNode import ExecuteError
 class ZfsNode(ExecuteNode):
    """a node that contains zfs datasets. implements global (systemwide/pool wide) zfs commands"""

-    def __init__(self, snapshot_time_format, hold_name, logger, ssh_config=None, ssh_to=None, readonly=False,
+    def __init__(self, logger, snapshot_time_format="", hold_name="", ssh_config=None, ssh_to=None, readonly=False,
                 description="",
                 debug_output=False, thinner=None):

@ -32,9 +32,9 @@ class ZfsNode(ExecuteNode):
            self.verbose("Using custom SSH config: {}".format(ssh_config))

        if ssh_to:
-            self.verbose("Datasets on: {}".format(ssh_to))
-        else:
-            self.verbose("Datasets are local")
+            self.verbose("SSH to: {}".format(ssh_to))
+        # else:
+        #     self.verbose("Datasets are local")

        if thinner is not None:
            rules = thinner.human_rules()
--- a/zfs_autobackup/util.py
+++ b/zfs_autobackup/util.py
@ -15,11 +15,21 @@ import hashlib
 # sys	0m0.462s

 # NOTE: surprisingly sha1 in via python3 is faster than the native sha1sum utility, even in the way we use below!
+import os
+import platform
+import sys
+import time
+
+import pathlib as pathlib
+
+
 def block_hash(fname, count=10000, bs=4006):
    """yields sha1 hash per count blocks.
    yields(chunk_nr, hexdigest)

    yields nothing for empty files.
+
+    This function was created to checksum huge files and blockdevices (TB's)
    """

    with open(fname, "rb") as f:
@ -37,3 +47,65 @@ def block_hash(fname, count=10000, bs=4006):
        # yield last (incomplete) block
        if block_nr % count != 0:
            yield (chunk_nr, hash.hexdigest())
+
+def block_hash_tree(start_path, count=10000, bs=4096):
+    """block_hash every file in a tree, yielding results"""
+
+    os.chdir(start_path)
+
+    for f in pathlib.Path('.').glob('**/*'):
+        if f.is_file() and not f.is_symlink():
+            for (chunk_nr, hash) in block_hash(f, count, bs):
+
+                yield ( f, chunk_nr, hash)
+
+
+def tmp_name(suffix=""):
+    """create temporary name unique to this process and node"""
+
+    #we could use uuids but those are ugly and confusing
+    name="{}_{}_{}".format(
+        os.path.basename(sys.argv[0]),
+        platform.node(),
+        os.getpid())
+    name=name+suffix
+    return name
+
+
+def get_tmp_clone_name(snapshot):
+    pool=snapshot.zfs_node.get_pool(snapshot)
+    return pool.name+"/"+tmp_name()
+
+
+#NOTE: https://www.google.com/search?q=Mount+Path+Limit+freebsd
+#Freebsd has limitations regarding path length, so we have to clone it so the part stays sort
+def activate_volume_snapshot(snapshot):
+    """clone volume, waits and tries to findout /dev path to the volume, in a compatible way. (linux/freebsd/smartos)"""
+
+    clone_name= get_tmp_clone_name(snapshot)
+    clone=snapshot.clone(clone_name)
+
+    #NOTE: add smartos location to this list as well
+    locations=[
+        "/dev/zvol/" + clone_name
+    ]
+
+    clone.debug("Waiting for /dev entry to appear...")
+    time.sleep(0.1)
+
+    start_time=time.time()
+    while time.time()-start_time<10:
+        for location in locations:
+            stdout, stderr, exit_code=clone.zfs_node.run(["test", "-e", location], return_all=True, valid_exitcodes=[0,1])
+
+            #fake it in testmode
+            if clone.zfs_node.readonly:
+                return location
+
+            if exit_code==0:
+                return location
+        time.sleep(1)
+
+    raise(Exception("Timeout while waiting for {} entry to appear.".format(locations)))
+
+