mirror of
https://github.com/psy0rz/zfs_autobackup.git
synced 2025-04-11 22:40:01 +03:00
wip
This commit is contained in:
parent
28ed44b1c8
commit
b68ca19e5f
@ -1,4 +1,6 @@
|
||||
import hashlib
|
||||
import os
|
||||
from random import random
|
||||
|
||||
|
||||
class BlockHasher():
|
||||
@ -12,14 +14,45 @@ class BlockHasher():
|
||||
Input and output generators are in the format ( chunk_nr, hexdigest )
|
||||
"""
|
||||
|
||||
def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1, coverage=1):
|
||||
def __init__(self, count=10000, bs=4096, hash_class=hashlib.sha1, skip=0):
|
||||
self.count = count
|
||||
self.bs = bs
|
||||
self.chunk_size=bs*count
|
||||
self.hash_class = hash_class
|
||||
self.coverage=1
|
||||
|
||||
self.stats_total=0
|
||||
self.stats_checked=0
|
||||
# self.coverage=coverage
|
||||
self.skip=skip
|
||||
self._skip_count=0
|
||||
|
||||
self.stats_total_bytes=0
|
||||
|
||||
|
||||
def _seek_next_chunk(self, fh, fsize):
|
||||
"""seek fh to next chunk and update skip counter.
|
||||
returns chunk_nr
|
||||
return false it should skip the rest of the file"""
|
||||
|
||||
#ignore rempty files
|
||||
if fsize==0:
|
||||
return False
|
||||
|
||||
# need to skip chunks?
|
||||
if self._skip_count > 0:
|
||||
chunks_left = ((fsize - fh.tell()) // self.chunk_size) + 1
|
||||
# not enough chunks left in this file?
|
||||
if self._skip_count >= chunks_left:
|
||||
# skip rest of this file
|
||||
self._skip_count = self._skip_count - chunks_left
|
||||
return False
|
||||
else:
|
||||
# seek to next chunk, reset skip count
|
||||
fh.seek(self.chunk_size * self._skip_count, os.SEEK_CUR)
|
||||
self._skip_count = self.skip
|
||||
return fh.tell()//self.chunk_size
|
||||
else:
|
||||
# should read this chunk, reset skip count
|
||||
self._skip_count = self.skip
|
||||
return fh.tell() // self.chunk_size
|
||||
|
||||
def generate(self, fname):
|
||||
"""Generates checksums
|
||||
@ -28,23 +61,37 @@ class BlockHasher():
|
||||
|
||||
yields nothing for empty files.
|
||||
"""
|
||||
with open(fname, "rb") as f:
|
||||
hash = self.hash_class()
|
||||
block_nr = 0
|
||||
chunk_nr = 0
|
||||
for block in iter(lambda: f.read(self.bs), b""):
|
||||
hash.update(block)
|
||||
block_nr = block_nr + 1
|
||||
if block_nr % self.count == 0:
|
||||
yield (chunk_nr, hash.hexdigest())
|
||||
chunk_nr = chunk_nr + 1
|
||||
hash = self.hash_class()
|
||||
with os.open(fname, os.O_RDONLY) as fh:
|
||||
print (os.lseek(fh, 0, os.SEEK_END))
|
||||
|
||||
|
||||
with os.openopen(fname, "rb") as fh:
|
||||
|
||||
# print(os.path.getsize(fname))
|
||||
print(os.lseek(fh, 0, os.SEEK_END))
|
||||
|
||||
fsize = fh.seek(0, os.SEEK_END)
|
||||
fh.seek(0)
|
||||
|
||||
while fh.tell()<fsize:
|
||||
|
||||
chunk_nr=self._seek_next_chunk(fh, fsize)
|
||||
if chunk_nr is False:
|
||||
return
|
||||
|
||||
#read chunk
|
||||
hash = self.hash_class()
|
||||
block_nr = 0
|
||||
while block_nr != self.count:
|
||||
block=fh.read(self.bs)
|
||||
if block==b"":
|
||||
break
|
||||
hash.update(block)
|
||||
block_nr = block_nr + 1
|
||||
|
||||
# yield last (incomplete) block
|
||||
if block_nr % self.count != 0:
|
||||
yield (chunk_nr, hash.hexdigest())
|
||||
|
||||
def compare(self, fname, generator):
|
||||
def compare(self, fname, generator):
|
||||
"""reads from generator and compares blocks
|
||||
Yields mismatches in the form: ( chunk_nr, hexdigest, actual_hexdigest)
|
||||
Yields errors in the form: ( chunk_nr, hexdigest, "message" )
|
||||
|
@ -20,10 +20,7 @@ class ZfsCheck(CliBase):
|
||||
|
||||
self.node = ZfsNode(self.log, readonly=self.args.test, debug_output=self.args.debug_output)
|
||||
|
||||
if self.args.check is None:
|
||||
self.block_hasher = BlockHasher(count=self.args.count, bs=self.args.block_size)
|
||||
else:
|
||||
self.block_hasher = BlockHasher(count=self.args.count, bs=self.args.block_size, coverage=self.args.percentage)
|
||||
self.block_hasher = BlockHasher(count=self.args.count, bs=self.args.block_size, skip=self.args.skip)
|
||||
|
||||
def get_parser(self):
|
||||
|
||||
@ -37,13 +34,13 @@ class ZfsCheck(CliBase):
|
||||
group.add_argument('--block-size', metavar="BYTES", default=4096, help="Read block-size, default %(default)s",
|
||||
type=int)
|
||||
group.add_argument('--count', metavar="COUNT", default=int((100 * (1024 ** 2)) / 4096),
|
||||
help="Hash chunks of COUNT blocks. Default %(default)s . (Chunk size is BYTES * COUNT) ", type=int) # 100MiB
|
||||
help="Hash chunks of COUNT blocks. Default %(default)s . (CHUNK size is BYTES * COUNT) ", type=int) # 100MiB
|
||||
|
||||
group.add_argument('--check', '-c', metavar="FILE", default=None, const=True, nargs='?',
|
||||
help="Read hashes from STDIN (or FILE) and compare them")
|
||||
|
||||
group.add_argument('--percentage', '-p', metavar="NUMBER", default=100, type=float,
|
||||
help="Generate/compare only this percentage of hashes. Default %(default)s")
|
||||
group.add_argument('--skip', '-s', metavar="NUMBER", default=0, type=float,
|
||||
help="Skip this number of chunks after every hash. %(default)s")
|
||||
|
||||
return parser
|
||||
|
||||
@ -61,10 +58,9 @@ class ZfsCheck(CliBase):
|
||||
self.verbose("Block size : {} bytes".format(args.block_size))
|
||||
self.verbose("Block count : {}".format(args.count))
|
||||
self.verbose("Effective chunk size : {} bytes".format(args.count*args.block_size))
|
||||
self.verbose("Percentage to check : {} %".format(args.percentage))
|
||||
self.verbose("Skip chunk count : {} (checks {:.2f}% of data)".format(args.skip, 100/(1+args.skip)))
|
||||
self.verbose("")
|
||||
|
||||
args.percentage=args.percentage/100
|
||||
|
||||
return args
|
||||
|
||||
@ -216,28 +212,30 @@ class ZfsCheck(CliBase):
|
||||
|
||||
last_progress_time = time.time()
|
||||
progress_checked = 0
|
||||
progress_total = 0
|
||||
progress_skipped = 0
|
||||
|
||||
line=input_fh.readline()
|
||||
skip=0
|
||||
while line:
|
||||
i=line.rstrip().split("\t")
|
||||
#ignores lines without tabs
|
||||
if (len(i)>1):
|
||||
|
||||
if self.args.percentage==1 or self.args.percentage>random():
|
||||
if skip==0:
|
||||
progress_checked=progress_checked+1
|
||||
yield i
|
||||
|
||||
progress_total=progress_total+1
|
||||
skip=self.args.skip
|
||||
else:
|
||||
skip=skip-1
|
||||
progress_skipped=progress_skipped+1
|
||||
|
||||
if self.args.progress and time.time() - last_progress_time > 1:
|
||||
last_progress_time = time.time()
|
||||
self.progress("Checked {}/{} hashes. ({:.2f}% coverage)".format(progress_checked, progress_total, (float(progress_checked)/progress_total)*100))
|
||||
self.progress("Checked {} hashes (skipped {})".format(progress_checked, progress_skipped))
|
||||
|
||||
line=input_fh.readline()
|
||||
|
||||
self.verbose("Checked {}/{} hashes. ({:.2f}% coverage)".format(progress_checked, progress_total, (
|
||||
float(progress_checked) / progress_total) * 100))
|
||||
self.verbose("Checked {} hashes (skipped {})".format(progress_checked, progress_skipped))
|
||||
|
||||
def run(self):
|
||||
|
||||
|
@ -0,0 +1,70 @@
|
||||
import os.path
|
||||
import os
|
||||
import time
|
||||
from random import random
|
||||
|
||||
with open('test.py', 'rb') as fh:
|
||||
|
||||
# fsize = fh.seek(10000, os.SEEK_END)
|
||||
# print(fsize)
|
||||
|
||||
start=time.time()
|
||||
for i in range(0,1000000):
|
||||
# fh.seek(0, 0)
|
||||
fsize=fh.seek(0, os.SEEK_END)
|
||||
# fsize=fh.tell()
|
||||
# os.path.getsize('test.py')
|
||||
print(time.time()-start)
|
||||
|
||||
|
||||
print(fh.tell())
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
|
||||
checked=1
|
||||
skipped=1
|
||||
coverage=0.1
|
||||
|
||||
max_skip=0
|
||||
|
||||
|
||||
skipinarow=0
|
||||
while True:
|
||||
total=checked+skipped
|
||||
|
||||
skip=coverage<random()
|
||||
if skip:
|
||||
skipped = skipped + 1
|
||||
print("S {:.2f}%".format(checked * 100 / total))
|
||||
|
||||
skipinarow = skipinarow+1
|
||||
if skipinarow>max_skip:
|
||||
max_skip=skipinarow
|
||||
else:
|
||||
skipinarow=0
|
||||
checked=checked+1
|
||||
print("C {:.2f}%".format(checked * 100 / total))
|
||||
|
||||
print(max_skip)
|
||||
|
||||
skip=0
|
||||
while True:
|
||||
|
||||
total=checked+skipped
|
||||
if skip>0:
|
||||
skip=skip-1
|
||||
skipped = skipped + 1
|
||||
print("S {:.2f}%".format(checked * 100 / total))
|
||||
else:
|
||||
checked=checked+1
|
||||
print("C {:.2f}%".format(checked * 100 / total))
|
||||
|
||||
#calc new skip
|
||||
skip=skip+((1/coverage)-1)*(random()*2)
|
||||
# print(skip)
|
||||
if skip> max_skip:
|
||||
max_skip=skip
|
||||
|
||||
print(max_skip)
|
Loading…
x
Reference in New Issue
Block a user