forked from third-party-mirrors/zfs_autobackup
wip
This commit is contained in:
parent
28ed44b1c8
commit
b68ca19e5f
@ -1,4 +1,6 @@
|
|||||||
import hashlib
|
import hashlib
|
||||||
|
import os
|
||||||
|
from random import random
|
||||||
|
|
||||||
|
|
||||||
class BlockHasher():
|
class BlockHasher():
|
||||||
@ -12,14 +14,45 @@ class BlockHasher():
|
|||||||
Input and output generators are in the format ( chunk_nr, hexdigest )
|
Input and output generators are in the format ( chunk_nr, hexdigest )
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1, coverage=1):
|
def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1, skip=0):
|
||||||
self.count = count
|
self.count = count
|
||||||
self.bs = bs
|
self.bs = bs
|
||||||
|
self.chunk_size=bs*count
|
||||||
self.hash_class = hash_class
|
self.hash_class = hash_class
|
||||||
self.coverage=1
|
|
||||||
|
|
||||||
self.stats_total=0
|
# self.coverage=coverage
|
||||||
self.stats_checked=0
|
self.skip=skip
|
||||||
|
self._skip_count=0
|
||||||
|
|
||||||
|
self.stats_total_bytes=0
|
||||||
|
|
||||||
|
|
||||||
|
def _seek_next_chunk(self, fh, fsize):
|
||||||
|
"""seek fh to next chunk and update skip counter.
|
||||||
|
returns chunk_nr
|
||||||
|
return false it should skip the rest of the file"""
|
||||||
|
|
||||||
|
#ignore rempty files
|
||||||
|
if fsize==0:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# need to skip chunks?
|
||||||
|
if self._skip_count > 0:
|
||||||
|
chunks_left = ((fsize - fh.tell()) // self.chunk_size) + 1
|
||||||
|
# not enough chunks left in this file?
|
||||||
|
if self._skip_count >= chunks_left:
|
||||||
|
# skip rest of this file
|
||||||
|
self._skip_count = self._skip_count - chunks_left
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# seek to next chunk, reset skip count
|
||||||
|
fh.seek(self.chunk_size * self._skip_count, os.SEEK_CUR)
|
||||||
|
self._skip_count = self.skip
|
||||||
|
return fh.tell()//self.chunk_size
|
||||||
|
else:
|
||||||
|
# should read this chunk, reset skip count
|
||||||
|
self._skip_count = self.skip
|
||||||
|
return fh.tell() // self.chunk_size
|
||||||
|
|
||||||
def generate(self, fname):
|
def generate(self, fname):
|
||||||
"""Generates checksums
|
"""Generates checksums
|
||||||
@ -28,23 +61,37 @@ class BlockHasher():
|
|||||||
|
|
||||||
yields nothing for empty files.
|
yields nothing for empty files.
|
||||||
"""
|
"""
|
||||||
with open(fname, "rb") as f:
|
with os.open(fname, os.O_RDONLY) as fh:
|
||||||
hash = self.hash_class()
|
print (os.lseek(fh, 0, os.SEEK_END))
|
||||||
block_nr = 0
|
|
||||||
chunk_nr = 0
|
|
||||||
for block in iter(lambda: f.read(self.bs), b""):
|
with os.openopen(fname, "rb") as fh:
|
||||||
hash.update(block)
|
|
||||||
block_nr = block_nr + 1
|
# print(os.path.getsize(fname))
|
||||||
if block_nr % self.count == 0:
|
print(os.lseek(fh, 0, os.SEEK_END))
|
||||||
yield (chunk_nr, hash.hexdigest())
|
|
||||||
chunk_nr = chunk_nr + 1
|
fsize = fh.seek(0, os.SEEK_END)
|
||||||
hash = self.hash_class()
|
fh.seek(0)
|
||||||
|
|
||||||
|
while fh.tell()<fsize:
|
||||||
|
|
||||||
|
chunk_nr=self._seek_next_chunk(fh, fsize)
|
||||||
|
if chunk_nr is False:
|
||||||
|
return
|
||||||
|
|
||||||
|
#read chunk
|
||||||
|
hash = self.hash_class()
|
||||||
|
block_nr = 0
|
||||||
|
while block_nr != self.count:
|
||||||
|
block=fh.read(self.bs)
|
||||||
|
if block==b"":
|
||||||
|
break
|
||||||
|
hash.update(block)
|
||||||
|
block_nr = block_nr + 1
|
||||||
|
|
||||||
# yield last (incomplete) block
|
|
||||||
if block_nr % self.count != 0:
|
|
||||||
yield (chunk_nr, hash.hexdigest())
|
yield (chunk_nr, hash.hexdigest())
|
||||||
|
|
||||||
def compare(self, fname, generator):
|
def compare(self, fname, generator):
|
||||||
"""reads from generator and compares blocks
|
"""reads from generator and compares blocks
|
||||||
Yields mismatches in the form: ( chunk_nr, hexdigest, actual_hexdigest)
|
Yields mismatches in the form: ( chunk_nr, hexdigest, actual_hexdigest)
|
||||||
Yields errors in the form: ( chunk_nr, hexdigest, "message" )
|
Yields errors in the form: ( chunk_nr, hexdigest, "message" )
|
||||||
|
@ -20,10 +20,7 @@ class ZfsCheck(CliBase):
|
|||||||
|
|
||||||
self.node = ZfsNode(self.log, readonly=self.args.test, debug_output=self.args.debug_output)
|
self.node = ZfsNode(self.log, readonly=self.args.test, debug_output=self.args.debug_output)
|
||||||
|
|
||||||
if self.args.check is None:
|
self.block_hasher = BlockHasher(count=self.args.count, bs=self.args.block_size, skip=self.args.skip)
|
||||||
self.block_hasher = BlockHasher(count=self.args.count, bs=self.args.block_size)
|
|
||||||
else:
|
|
||||||
self.block_hasher = BlockHasher(count=self.args.count, bs=self.args.block_size, coverage=self.args.percentage)
|
|
||||||
|
|
||||||
def get_parser(self):
|
def get_parser(self):
|
||||||
|
|
||||||
@ -37,13 +34,13 @@ class ZfsCheck(CliBase):
|
|||||||
group.add_argument('--block-size', metavar="BYTES", default=4096, help="Read block-size, default %(default)s",
|
group.add_argument('--block-size', metavar="BYTES", default=4096, help="Read block-size, default %(default)s",
|
||||||
type=int)
|
type=int)
|
||||||
group.add_argument('--count', metavar="COUNT", default=int((100 * (1024 ** 2)) / 4096),
|
group.add_argument('--count', metavar="COUNT", default=int((100 * (1024 ** 2)) / 4096),
|
||||||
help="Hash chunks of COUNT blocks. Default %(default)s . (Chunk size is BYTES * COUNT) ", type=int) # 100MiB
|
help="Hash chunks of COUNT blocks. Default %(default)s . (CHUNK size is BYTES * COUNT) ", type=int) # 100MiB
|
||||||
|
|
||||||
group.add_argument('--check', '-c', metavar="FILE", default=None, const=True, nargs='?',
|
group.add_argument('--check', '-c', metavar="FILE", default=None, const=True, nargs='?',
|
||||||
help="Read hashes from STDIN (or FILE) and compare them")
|
help="Read hashes from STDIN (or FILE) and compare them")
|
||||||
|
|
||||||
group.add_argument('--percentage', '-p', metavar="NUMBER", default=100, type=float,
|
group.add_argument('--skip', '-s', metavar="NUMBER", default=0, type=float,
|
||||||
help="Generate/compare only this percentage of hashes. Default %(default)s")
|
help="Skip this number of chunks after every hash. %(default)s")
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
@ -61,10 +58,9 @@ class ZfsCheck(CliBase):
|
|||||||
self.verbose("Block size : {} bytes".format(args.block_size))
|
self.verbose("Block size : {} bytes".format(args.block_size))
|
||||||
self.verbose("Block count : {}".format(args.count))
|
self.verbose("Block count : {}".format(args.count))
|
||||||
self.verbose("Effective chunk size : {} bytes".format(args.count*args.block_size))
|
self.verbose("Effective chunk size : {} bytes".format(args.count*args.block_size))
|
||||||
self.verbose("Percentage to check : {} %".format(args.percentage))
|
self.verbose("Skip chunk count : {} (checks {:.2f}% of data)".format(args.skip, 100/(1+args.skip)))
|
||||||
self.verbose("")
|
self.verbose("")
|
||||||
|
|
||||||
args.percentage=args.percentage/100
|
|
||||||
|
|
||||||
return args
|
return args
|
||||||
|
|
||||||
@ -216,28 +212,30 @@ class ZfsCheck(CliBase):
|
|||||||
|
|
||||||
last_progress_time = time.time()
|
last_progress_time = time.time()
|
||||||
progress_checked = 0
|
progress_checked = 0
|
||||||
progress_total = 0
|
progress_skipped = 0
|
||||||
|
|
||||||
line=input_fh.readline()
|
line=input_fh.readline()
|
||||||
|
skip=0
|
||||||
while line:
|
while line:
|
||||||
i=line.rstrip().split("\t")
|
i=line.rstrip().split("\t")
|
||||||
#ignores lines without tabs
|
#ignores lines without tabs
|
||||||
if (len(i)>1):
|
if (len(i)>1):
|
||||||
|
|
||||||
if self.args.percentage==1 or self.args.percentage>random():
|
if skip==0:
|
||||||
progress_checked=progress_checked+1
|
progress_checked=progress_checked+1
|
||||||
yield i
|
yield i
|
||||||
|
skip=self.args.skip
|
||||||
progress_total=progress_total+1
|
else:
|
||||||
|
skip=skip-1
|
||||||
|
progress_skipped=progress_skipped+1
|
||||||
|
|
||||||
if self.args.progress and time.time() - last_progress_time > 1:
|
if self.args.progress and time.time() - last_progress_time > 1:
|
||||||
last_progress_time = time.time()
|
last_progress_time = time.time()
|
||||||
self.progress("Checked {}/{} hashes. ({:.2f}% coverage)".format(progress_checked, progress_total, (float(progress_checked)/progress_total)*100))
|
self.progress("Checked {} hashes (skipped {})".format(progress_checked, progress_skipped))
|
||||||
|
|
||||||
line=input_fh.readline()
|
line=input_fh.readline()
|
||||||
|
|
||||||
self.verbose("Checked {}/{} hashes. ({:.2f}% coverage)".format(progress_checked, progress_total, (
|
self.verbose("Checked {} hashes (skipped {})".format(progress_checked, progress_skipped))
|
||||||
float(progress_checked) / progress_total) * 100))
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
|
||||||
|
@ -0,0 +1,70 @@
|
|||||||
|
import os.path
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from random import random
|
||||||
|
|
||||||
|
with open('test.py', 'rb') as fh:
|
||||||
|
|
||||||
|
# fsize = fh.seek(10000, os.SEEK_END)
|
||||||
|
# print(fsize)
|
||||||
|
|
||||||
|
start=time.time()
|
||||||
|
for i in range(0,1000000):
|
||||||
|
# fh.seek(0, 0)
|
||||||
|
fsize=fh.seek(0, os.SEEK_END)
|
||||||
|
# fsize=fh.tell()
|
||||||
|
# os.path.getsize('test.py')
|
||||||
|
print(time.time()-start)
|
||||||
|
|
||||||
|
|
||||||
|
print(fh.tell())
|
||||||
|
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
checked=1
|
||||||
|
skipped=1
|
||||||
|
coverage=0.1
|
||||||
|
|
||||||
|
max_skip=0
|
||||||
|
|
||||||
|
|
||||||
|
skipinarow=0
|
||||||
|
while True:
|
||||||
|
total=checked+skipped
|
||||||
|
|
||||||
|
skip=coverage<random()
|
||||||
|
if skip:
|
||||||
|
skipped = skipped + 1
|
||||||
|
print("S {:.2f}%".format(checked * 100 / total))
|
||||||
|
|
||||||
|
skipinarow = skipinarow+1
|
||||||
|
if skipinarow>max_skip:
|
||||||
|
max_skip=skipinarow
|
||||||
|
else:
|
||||||
|
skipinarow=0
|
||||||
|
checked=checked+1
|
||||||
|
print("C {:.2f}%".format(checked * 100 / total))
|
||||||
|
|
||||||
|
print(max_skip)
|
||||||
|
|
||||||
|
skip=0
|
||||||
|
while True:
|
||||||
|
|
||||||
|
total=checked+skipped
|
||||||
|
if skip>0:
|
||||||
|
skip=skip-1
|
||||||
|
skipped = skipped + 1
|
||||||
|
print("S {:.2f}%".format(checked * 100 / total))
|
||||||
|
else:
|
||||||
|
checked=checked+1
|
||||||
|
print("C {:.2f}%".format(checked * 100 / total))
|
||||||
|
|
||||||
|
#calc new skip
|
||||||
|
skip=skip+((1/coverage)-1)*(random()*2)
|
||||||
|
# print(skip)
|
||||||
|
if skip> max_skip:
|
||||||
|
max_skip=skip
|
||||||
|
|
||||||
|
print(max_skip)
|
Loading…
x
Reference in New Issue
Block a user