mirror of
https://github.com/psy0rz/zfs_autobackup.git
synced 2025-06-05 01:33:00 +03:00
Fixes #225 zfs-check: efficient handling of sparse files
BlockHasher.py * hash_class is sourced from cli args instead of hardcoding it. * hash_factory() lays the groundwork to support arbitrary hash libs. * Detection of and use of xxhash lib. ZfsCheck.py * Implement new cli arg --hash. The choices for the arg are generated based on what is detected in the python env. * The input to --hash is is validated against the arg choices. * Implemented helper method determine_algorithms_available(). This tries to pick a performant default with a fallback to sha1. * Detection of and use of xxhash lib.
This commit is contained in:
parent
7122dc92af
commit
bf2300beb9
@ -1,6 +1,11 @@
|
|||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
xxhash = None
|
||||||
|
try:
|
||||||
|
import xxhash
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
class BlockHasher():
|
class BlockHasher():
|
||||||
"""This class was created to checksum huge files and blockdevices (TB's)
|
"""This class was created to checksum huge files and blockdevices (TB's)
|
||||||
@ -16,7 +21,7 @@ class BlockHasher():
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1, skip=0):
|
def __init__(self, count=10000, bs=4096, hash_class=None, skip=0):
|
||||||
self.count = count
|
self.count = count
|
||||||
self.bs = bs
|
self.bs = bs
|
||||||
self.chunk_size=bs*count
|
self.chunk_size=bs*count
|
||||||
@ -28,6 +33,11 @@ class BlockHasher():
|
|||||||
|
|
||||||
self.stats_total_bytes=0
|
self.stats_total_bytes=0
|
||||||
|
|
||||||
|
def hash_factory(self):
|
||||||
|
if self.hash_class in hashlib.algorithms_available:
|
||||||
|
return hashlib.new(self.hash_class)
|
||||||
|
if self.hash_class.startswith('xxh'):
|
||||||
|
return getattr(xxhash, self.hash_class)()
|
||||||
|
|
||||||
def _seek_next_chunk(self, fh, fsize):
|
def _seek_next_chunk(self, fh, fsize):
|
||||||
"""seek fh to next chunk and update skip counter.
|
"""seek fh to next chunk and update skip counter.
|
||||||
@ -80,7 +90,7 @@ class BlockHasher():
|
|||||||
return
|
return
|
||||||
|
|
||||||
#read chunk
|
#read chunk
|
||||||
hash = self.hash_class()
|
hash = self.hash_factory()
|
||||||
block_nr = 0
|
block_nr = 0
|
||||||
while block_nr != self.count:
|
while block_nr != self.count:
|
||||||
block=fh.read(self.bs)
|
block=fh.read(self.bs)
|
||||||
@ -105,7 +115,7 @@ class BlockHasher():
|
|||||||
try:
|
try:
|
||||||
|
|
||||||
checked = checked + 1
|
checked = checked + 1
|
||||||
hash = self.hash_class()
|
hash = self.hash_factory()
|
||||||
f.seek(int(chunk_nr) * self.bs * self.count)
|
f.seek(int(chunk_nr) * self.bs * self.count)
|
||||||
block_nr = 0
|
block_nr = 0
|
||||||
for block in iter(lambda: f.read(self.bs), b""):
|
for block in iter(lambda: f.read(self.bs), b""):
|
||||||
@ -124,4 +134,4 @@ class BlockHasher():
|
|||||||
yield ( chunk_nr , hexdigest, 'ERROR: '+str(e))
|
yield ( chunk_nr , hexdigest, 'ERROR: '+str(e))
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
yield ( '-', '-', 'ERROR: '+ str(e))
|
yield ( '-', '-', 'ERROR: '+ str(e))
|
||||||
|
@ -10,6 +10,13 @@ from .ZfsNode import ZfsNode
|
|||||||
from .util import *
|
from .util import *
|
||||||
from .CliBase import CliBase
|
from .CliBase import CliBase
|
||||||
|
|
||||||
|
from hashlib import algorithms_available
|
||||||
|
from copy import copy
|
||||||
|
xxhash = None
|
||||||
|
try:
|
||||||
|
import xxhash
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
class ZfsCheck(CliBase):
|
class ZfsCheck(CliBase):
|
||||||
|
|
||||||
@ -20,7 +27,17 @@ class ZfsCheck(CliBase):
|
|||||||
|
|
||||||
self.node = ZfsNode(self.log, utc=self.args.utc, readonly=self.args.test, debug_output=self.args.debug_output)
|
self.node = ZfsNode(self.log, utc=self.args.utc, readonly=self.args.test, debug_output=self.args.debug_output)
|
||||||
|
|
||||||
self.block_hasher = BlockHasher(count=self.args.count, bs=self.args.block_size, skip=self.args.skip)
|
self.block_hasher = BlockHasher(count=self.args.count, bs=self.args.block_size, skip=self.args.skip, hash_class=self.args.hash)
|
||||||
|
|
||||||
|
def determine_algorithms_available(self):
|
||||||
|
self.algorithms_available = copy(algorithms_available)
|
||||||
|
|
||||||
|
if None != xxhash:
|
||||||
|
for value in ( 'xxh128', 'xxh32', 'xxh3_128', 'xxh3_64', 'xxh64' ):
|
||||||
|
self.algorithms_available.add(value)
|
||||||
|
self.hash_default = 'xxh3_64'
|
||||||
|
else:
|
||||||
|
self.hash_default = 'sha1'
|
||||||
|
|
||||||
def get_parser(self):
|
def get_parser(self):
|
||||||
|
|
||||||
@ -42,6 +59,9 @@ class ZfsCheck(CliBase):
|
|||||||
group.add_argument('--skip', '-s', metavar="NUMBER", default=0, type=int,
|
group.add_argument('--skip', '-s', metavar="NUMBER", default=0, type=int,
|
||||||
help="Skip this number of chunks after every hash. %(default)s")
|
help="Skip this number of chunks after every hash. %(default)s")
|
||||||
|
|
||||||
|
self.determine_algorithms_available()
|
||||||
|
group.add_argument('--hash', default=self.hash_default,
|
||||||
|
help="Specify the hashing algorithm to use", choices=sorted([item for item in self.algorithms_available]))
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
def parse_args(self, argv):
|
def parse_args(self, argv):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user