completed progressive thinner class

This commit is contained in:
Edwin Eefting 2019-10-22 20:24:43 +02:00
parent 63d2091712
commit 34d0c5d67b

View File

@ -79,6 +79,174 @@ def abort(txt):
sys.exit(255) sys.exit(255)
class ThinnerRule:
"""a thinning schedule rule for Thinner"""
TIME_NAMES={
'y' : 3600 * 24 * 365.25,
'm' : 3600 * 24 * 30,
'w' : 3600 * 24 * 7,
'd' : 3600 * 24,
'h' : 3600,
'min' : 60,
's' : 1,
}
def parse_rule(self, rule_str):
"""parse scheduling string
example:
daily snapshot, remove after a week: 1d1w
weekly snapshot, remove after a month: 1w1m
monthly snapshot, remove after 6 months: 1m6m
yearly snapshot, remove after 2 year: 1y2y
keep all snapshots, remove after a day 1s1d
keep nothing: 1s1s
"""
rule_str=rule_str.lower()
matches=re.findall("([0-9]*)([a-z]*)([0-9]*)([a-z]*)", rule_str)[0]
period_amount=int(matches[0])
period_unit=matches[1]
ttl_amount=int(matches[2])
ttl_unit=matches[3]
if not period_unit in self.TIME_NAMES:
raise(Exception("Invalid period string in schedule: '{}'".format(rule_str)))
if not ttl_unit in self.TIME_NAMES:
raise(Exception("Invalid ttl string in schedule: '{}'".format(rule_str)))
self.period=period_amount * self.TIME_NAMES[period_unit]
self.ttl=ttl_amount * self.TIME_NAMES[ttl_unit]
if self.period>self.ttl:
raise(Exception("Period cant be longer than ttl in schedule: '{}'".format(rule_str)))
self.rule_str=rule_str
def __str__(self):
"""get schedule as a schedule string"""
return(self.rule_str)
def __init__(self, rule_str):
self.parse_rule(rule_str)
pass
class Thinner:
"""progressive thinner (universal, used for cleaning up snapshots)"""
def __init__(self, schedule_str, always_keep=1):
"""schedule_str: comman seperated list of ThinnerRules
always_keep: always keep the last X snapshots
"""
self.always_keep=always_keep
self.rules=[]
rule_strs=schedule_str.split(",")
for rule_str in rule_strs:
self.rules.append(ThinnerRule(rule_str))
def run(self,objects, now=None):
"""thin list of objects with current schedule rules.
object should have timestamp-attribute with unix timestamp
return( keeps, removes )
"""
if len(objects)<=self.always_keep:
return ( (objects, []) )
time_blocks={}
for rule in self.rules:
time_blocks[rule.period]={}
if not now:
now=int(time.time())
keeps=[]
removes=[]
#traverse objects
for object in objects[:-self.always_keep]:
timestamp=object.timestamp
age=now-timestamp
# store in the correct time blocks, per period-size, if not too old yet
keep=False
for rule in self.rules:
if age<=rule.ttl:
block_nr=int(timestamp/rule.period)
if not block_nr in time_blocks[rule.period]:
time_blocks[rule.period][block_nr]=True
keep=True
if keep:
keeps.append(object)
else:
removes.append(object)
keeps.extend(objects[-self.always_keep:])
return( (keeps, removes) )
######### Thinner testing code
now=int(time.time())
t=Thinner("1d1w,1w1m,1m6m,1y2y", always_keep=1)
import random
class Thing:
def __init__(self, timestamp):
self.timestamp=timestamp
def __str__(self):
age=now-self.timestamp
struct=time.localtime(self.timestamp)
return("{} ({} days old)".format(time.strftime("%Y-%m-%d %H:%M:%S",struct),int(age/(3600*24))))
def test():
global now
things=[]
while True:
print("#################### {}".format(time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(now))))
(keeps, removes)=t.run(things, now)
print ("### KEEP ")
for thing in keeps:
print(thing)
print ("### REMOVE ")
for thing in removes:
print(thing)
things=keeps
#increase random amount of time and maybe add a thing
now=now+random.randint(0,160000)
if random.random()>=0:
things.append(Thing(now))
sys.stdin.readline()
test()
class cached_property(object): class cached_property(object):
""" A property that is only computed once per instance and then replaces """ A property that is only computed once per instance and then replaces
itself with an ordinary attribute. Deleting the attribute resets the itself with an ordinary attribute. Deleting the attribute resets the
@ -297,10 +465,12 @@ class ZfsDataset():
#TODO: nicer? #TODO: nicer?
self._cached_properties={} self._cached_properties={}
def lstrip_path(self,count): def lstrip_path(self,count):
"""return name with first count components stripped""" """return name with first count components stripped"""
return("/".join(self.name.split("/")[count:])) return("/".join(self.name.split("/")[count:]))
def rstrip_path(self,count): def rstrip_path(self,count):
"""return name with last count components stripped""" """return name with last count components stripped"""
return("/".join(self.name.split("/")[:-count])) return("/".join(self.name.split("/")[:-count]))
@ -312,12 +482,14 @@ class ZfsDataset():
(filesystem, snapshot_name)=self.name.split("@") (filesystem, snapshot_name)=self.name.split("@")
return(filesystem) return(filesystem)
@property @property
def snapshot_name(self): def snapshot_name(self):
"""snapshot part of the name""" """snapshot part of the name"""
(filesystem, snapshot_name)=self.name.split("@") (filesystem, snapshot_name)=self.name.split("@")
return(snapshot_name) return(snapshot_name)
@property @property
def is_snapshot(self): def is_snapshot(self):
"""true if this dataset is a snapshot""" """true if this dataset is a snapshot"""
@ -336,12 +508,14 @@ class ZfsDataset():
else: else:
return(ZfsDataset(self.zfs_node, self.rstrip_path(1))) return(ZfsDataset(self.zfs_node, self.rstrip_path(1)))
@cached_property @cached_property
def exists(self): def exists(self):
"""check if dataset exists""" """check if dataset exists"""
self.debug("Checking if filesystem exists") self.debug("Checking if filesystem exists")
return(self.zfs_node.run(tab_split=True, cmd=[ "zfs", "list", self.name], readonly=True, valid_exitcodes=[ 0,1 ], hide_errors=True) and True) return(self.zfs_node.run(tab_split=True, cmd=[ "zfs", "list", self.name], readonly=True, valid_exitcodes=[ 0,1 ], hide_errors=True) and True)
def create_filesystem(self, parents=False): def create_filesystem(self, parents=False):
"""create a filesytem""" """create a filesytem"""
if parents: if parents:
@ -354,11 +528,13 @@ class ZfsDataset():
#update cache #update cache
self.exists=1 self.exists=1
def destroy(self): def destroy(self):
self.debug("Destroying") self.debug("Destroying")
self.zfs_node.run(["zfs", "destroy", self.name]) self.zfs_node.run(["zfs", "destroy", self.name])
self.invalidate() self.invalidate()
@cached_property @cached_property
def properties(self): def properties(self):
"""all zfs properties""" """all zfs properties"""
@ -370,6 +546,7 @@ class ZfsDataset():
return(dict(self.zfs_node.run(tab_split=True, cmd=cmd, readonly=True, valid_exitcodes=[ 0 ]))) return(dict(self.zfs_node.run(tab_split=True, cmd=cmd, readonly=True, valid_exitcodes=[ 0 ])))
def is_changed(self): def is_changed(self):
"""dataset is changed since ANY latest snapshot ?""" """dataset is changed since ANY latest snapshot ?"""
self.debug("Checking if dataset is changed") self.debug("Checking if dataset is changed")
@ -379,6 +556,7 @@ class ZfsDataset():
else: else:
return(True) return(True)
def is_ours(self): def is_ours(self):
"""return true if this snapshot is created by this backup_nanme""" """return true if this snapshot is created by this backup_nanme"""
if re.match("^"+self.zfs_node.backup_name+"-[0-9]*$", self.snapshot_name): if re.match("^"+self.zfs_node.backup_name+"-[0-9]*$", self.snapshot_name):
@ -386,6 +564,19 @@ class ZfsDataset():
else: else:
return(False) return(False)
@property
def timestamp(self):
"""get timestamp from snapshot name. Only works for our own snapshots with the correct format."""
time_str=re.findall("^.*-([0-9]*)$", self.snapshot_name)[0]
if len(time_str)!=14:
raise(Exception("Snapshot has invalid timestamp in name: {}".format(self.snapshot_name)))
#new format:
time_secs=time.mktime(time.strptime(time_str,"%Y%m%d%H%M%S"))
return(time_str)
def from_names(self, names): def from_names(self, names):
"""convert a list of names to a list ZfsDatasets for this zfs_node""" """convert a list of names to a list ZfsDatasets for this zfs_node"""
ret=[] ret=[]
@ -813,152 +1004,19 @@ class ZfsAutobackup:
raise raise
times=[] #times=[]
time_blocks={
'years' : 3600 * 24 * 365.25,
'months' : 3600 * 24 * 30,
'weeks' : 3600 * 24 * 7,
'days' : 3600 * 24,
'hours' : 3600,
'minutes' : 60,
}
now=int(time.time())
def thin(schedule, snapshots):
if len(snapshots)==0:
return(snapshots)
ret=[]
time_blocks={}
for ( period, ttl ) in schedule:
time_blocks[period]={}
# for snapshot in list(reversed(snapshots)):
#always keep latest
for snapshot in snapshots:
snapshot_time=snapshot
keeps=""
# just store in the correct time blocks, per period-size
for ( period, ttl ) in schedule:
block_nr=int(snapshot_time/period)
if not block_nr in time_blocks[period]:
time_blocks[period][block_nr]=[]
time_blocks[period][block_nr].append(snapshot_time)
#
keep=set() # test()
#
#now get the oldest one within the ttl, per block #
for ( period, ttl ) in schedule: #
for ( block_nr, snapshots ) in time_blocks[period].items():
for snapshot_time in sorted(snapshots):
age=now-snapshot_time
if age<ttl:
keep.add(snapshot_time)
break
return (sorted(keep))
# return(list(reversed(ret)))
#always keep latest!
# if not keeps and snapshots:
# # ret.append(snapshots[:-1])
# struct=time.localtime(snapshot_time)
# if keeps:
# ret.append(snapshot)
# print("{} {} {}days".format(time.strftime("%Y-%m-%d %H:%M:%S",struct),keeps,int(age/(3600*24))))
# # else:
# # print("{}".format(time.strftime("%Y-%m-%d %H:%M:%S",struct)))
# #
# #
# p(time_blocks)
# ret.append(snapshots[-1])
# struct=time.localtime(snapshots[-1])
# print("{}".format(time.strftime("%Y-%m-%d %H:%M:%S",struct)))
# return(ret)
# snapshots=range(now-400*24*3600, now, 24*3600)
schedule=[
#every ... keep for ...
( 1*time_blocks['days'] , 4 * time_blocks['days'] ),
( 1*time_blocks['weeks'] , 4 * time_blocks['weeks'] ),
( 1*time_blocks['months'], (6 * time_blocks['months']) ),
( 1*time_blocks['years'], 2* time_blocks['years'] ),
]
import random
def printsnap(s):
age=now-s
struct=time.localtime(s)
return("{} {}days".format(time.strftime("%Y-%m-%d %H:%M:%S",struct),int(age/(3600*24))))
def test():
global now
a=[]
b=[]
while True:
print("#################### {}".format(time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(now))))
# if random.random()>0.5:
a.append(now)
a=thin(schedule,a)
# b.append(now)
# b=thin(schedule,a, oldest=False)
b=[]
for count in range(0,max(len(a), len(b))):
sa=""
if count<len(a):
sa=printsnap(a[count])
sb=""
if count<len(b):
sb=printsnap(b[count])
print("{:15} | {:15}".format(sa,sb))
# for s in msnapshots:
# age=now-s
# struct=time.localtime(s)
# print("{} {}days".format(time.strftime("%Y-%m-%d %H:%M:%S",struct),int(age/(3600*24))))
sys.stdin.readline()
now=now+random.randint(0,800000)
# msnapshots.insert(0,now)
test()
# #
# #
# #