zfs check initial version (wip)

This commit is contained in:
Edwin Eefting 2022-02-20 17:30:02 +01:00
parent 626c84fe47
commit a115f0bd17
3 changed files with 145 additions and 12 deletions

View File

@ -1,11 +1,9 @@
import hashlib
from .util import block_hash
from .ZfsNode import ZfsNode
from .util import *
from .CliBase import CliBase
class ZfsCheck(CliBase):
def __init__(self, argv, print_arguments=True):
@ -13,15 +11,78 @@ class ZfsCheck(CliBase):
# NOTE: common options and parameters are in ZfsAuto
super(ZfsCheck, self).__init__(argv, print_arguments)
self.node=ZfsNode(self.log, readonly=self.args.test, debug_output=self.args.debug_output)
def get_parser(self):
parser=super(ZfsCheck, self).get_parser()
#positional arguments
parser.add_argument('snapshot', metavar='SNAPSHOT', default=None, nargs='?',
help='Snapshot to checksum')
group=parser.add_argument_group('Hasher options')
group.add_argument('--block-size', metavar="BYTES", default=4096, help="Read block-size, default %(default)s", type=int)
group.add_argument('--count', metavar="COUNT", default=int((100*(1024**2))/4096), help="Generate a hash for every COUNT blocks. default %(default)s", type=int) #100MiB
return parser
def parse_args(self, argv):
args=super(ZfsCheck, self).parse_args(argv)
if args.test:
self.warning("TEST MODE - NOT DOING ANYTHING USEFULL")
self.log.show_debug=True #show at least what we would do
return args
def hash_filesystem(self, snapshot):
"""
:type snapshot: ZfsDataset.ZfsDataset
"""
mnt="/tmp/"+tmp_name()
try:
self.debug("Create temporary mount point {}".format(mnt))
self.node.run(["mkdir", mnt])
snapshot.mount(mnt)
self.debug("Hashing tree: {}".format(mnt))
if not self.args.test:
for (file, block, hash) in block_hash_tree(mnt):
print("{}\t{}\t{}".format(file, block, hash))
finally:
self.debug("Cleaning up temporary mount point")
snapshot.unmount()
self.node.run(["rmdir", mnt], hide_errors=True, valid_exitcodes=[])
def run(self):
snapshot=self.node.get_dataset(self.args.snapshot)
if not snapshot.exists:
snapshot.error("Dataset not found")
sys.exit(1)
# print(sha1sum("/home/psy/Downloads/carimage.zip"))
for (block, h ) in block_hash("/home/psy/Downloads/carimage.zip" , count=10000):
print(block)
print (h)
if not snapshot.is_snapshot:
snapshot.error("Dataset should be a snapshot")
sys.exit(1)
dataset_type=snapshot.parent.properties['type']
if dataset_type=='volume':
self.checksum_volume(snapshot)
elif dataset_type=='filesystem':
self.hash_filesystem(snapshot)
else:
raise Exception("huh?")
pass

View File

@ -17,7 +17,7 @@ from .ExecuteNode import ExecuteError
class ZfsNode(ExecuteNode):
"""a node that contains zfs datasets. implements global (systemwide/pool wide) zfs commands"""
def __init__(self, snapshot_time_format, hold_name, logger, ssh_config=None, ssh_to=None, readonly=False,
def __init__(self, logger, snapshot_time_format="", hold_name="", ssh_config=None, ssh_to=None, readonly=False,
description="",
debug_output=False, thinner=None):
@ -32,9 +32,9 @@ class ZfsNode(ExecuteNode):
self.verbose("Using custom SSH config: {}".format(ssh_config))
if ssh_to:
self.verbose("Datasets on: {}".format(ssh_to))
else:
self.verbose("Datasets are local")
self.verbose("SSH to: {}".format(ssh_to))
# else:
# self.verbose("Datasets are local")
if thinner is not None:
rules = thinner.human_rules()

View File

@ -15,11 +15,21 @@ import hashlib
# sys 0m0.462s
# NOTE: surprisingly sha1 in via python3 is faster than the native sha1sum utility, even in the way we use below!
import os
import platform
import sys
import time
import pathlib as pathlib
def block_hash(fname, count=10000, bs=4006):
"""yields sha1 hash per count blocks.
yields(chunk_nr, hexdigest)
yields nothing for empty files.
This function was created to checksum huge files and blockdevices (TB's)
"""
with open(fname, "rb") as f:
@ -37,3 +47,65 @@ def block_hash(fname, count=10000, bs=4006):
# yield last (incomplete) block
if block_nr % count != 0:
yield (chunk_nr, hash.hexdigest())
def block_hash_tree(start_path, count=10000, bs=4096):
"""block_hash every file in a tree, yielding results"""
os.chdir(start_path)
for f in pathlib.Path('.').glob('**/*'):
if f.is_file() and not f.is_symlink():
for (chunk_nr, hash) in block_hash(f, count, bs):
yield ( f, chunk_nr, hash)
def tmp_name(suffix=""):
"""create temporary name unique to this process and node"""
#we could use uuids but those are ugly and confusing
name="{}_{}_{}".format(
os.path.basename(sys.argv[0]),
platform.node(),
os.getpid())
name=name+suffix
return name
def get_tmp_clone_name(snapshot):
pool=snapshot.zfs_node.get_pool(snapshot)
return pool.name+"/"+tmp_name()
#NOTE: https://www.google.com/search?q=Mount+Path+Limit+freebsd
#Freebsd has limitations regarding path length, so we have to clone it so the part stays sort
def activate_volume_snapshot(snapshot):
"""clone volume, waits and tries to findout /dev path to the volume, in a compatible way. (linux/freebsd/smartos)"""
clone_name= get_tmp_clone_name(snapshot)
clone=snapshot.clone(clone_name)
#NOTE: add smartos location to this list as well
locations=[
"/dev/zvol/" + clone_name
]
clone.debug("Waiting for /dev entry to appear...")
time.sleep(0.1)
start_time=time.time()
while time.time()-start_time<10:
for location in locations:
stdout, stderr, exit_code=clone.zfs_node.run(["test", "-e", location], return_all=True, valid_exitcodes=[0,1])
#fake it in testmode
if clone.zfs_node.readonly:
return location
if exit_code==0:
return location
time.sleep(1)
raise(Exception("Timeout while waiting for {} entry to appear.".format(locations)))