zfs_autobackup/zfs_autobackup
Edwin Eefting c4bbce6fda wip
2019-10-27 11:16:41 +01:00

1188 lines
41 KiB
Python
Executable File

#!/usr/bin/env python
# -*- coding: utf8 -*-
# (c)edwin@datux.nl - Released under GPL
#
# Greetings from eth0 2019 :)
from __future__ import print_function
import os
import sys
import re
import traceback
import subprocess
import pprint
# import cStringIO
import time
import argparse
from pprint import pprint as p
import select
import imp
try:
import colorama
use_color=True
except ImportError:
use_color=False
class Log:
def __init__(self, show_debug=False, show_verbose=False):
self.last_log=""
self.show_debug=show_debug
self.show_verbose=show_verbose
def error(self, txt):
if use_color:
print(colorama.Fore.RED+colorama.Style.BRIGHT+ "! "+txt+colorama.Style.RESET_ALL, file=sys.stderr)
else:
print("! "+txt, file=sys.stderr)
def verbose(self, txt):
if self.show_verbose:
if use_color:
print(colorama.Style.NORMAL+ " "+txt+colorama.Style.RESET_ALL)
else:
print(" "+txt)
def debug(self, txt):
if self.show_debug:
if use_color:
print(colorama.Fore.GREEN+ "# "+txt+colorama.Style.RESET_ALL)
else:
print("# "+txt)
#fatal abort execution, exit code 255
def abort(txt):
print(txt, file=sys.stderr)
sys.exit(255)
class ThinnerRule:
"""a thinning schedule rule for Thinner"""
TIME_NAMES={
'y' : 3600 * 24 * 365.25,
'm' : 3600 * 24 * 30,
'w' : 3600 * 24 * 7,
'd' : 3600 * 24,
'h' : 3600,
'min' : 60,
's' : 1,
}
TIME_DESC={
'y' : 'year',
'm' : 'month',
'w' : 'week',
'd' : 'day',
'h' : 'hour',
'min' : 'minute',
's' : 'second',
}
def parse_rule(self, rule_str):
"""parse scheduling string
example:
daily snapshot, remove after a week: 1d1w
weekly snapshot, remove after a month: 1w1m
monthly snapshot, remove after 6 months: 1m6m
yearly snapshot, remove after 2 year: 1y2y
keep all snapshots, remove after a day 1s1d
keep nothing: 1s1s
"""
rule_str=rule_str.lower()
matches=re.findall("([0-9]*)([a-z]*)([0-9]*)([a-z]*)", rule_str)[0]
period_amount=int(matches[0])
period_unit=matches[1]
ttl_amount=int(matches[2])
ttl_unit=matches[3]
if not period_unit in self.TIME_NAMES:
raise(Exception("Invalid period string in schedule: '{}'".format(rule_str)))
if not ttl_unit in self.TIME_NAMES:
raise(Exception("Invalid ttl string in schedule: '{}'".format(rule_str)))
self.period=period_amount * self.TIME_NAMES[period_unit]
self.ttl=ttl_amount * self.TIME_NAMES[ttl_unit]
if self.period>self.ttl:
raise(Exception("Period cant be longer than ttl in schedule: '{}'".format(rule_str)))
self.rule_str=rule_str
self.human_str="Keep oldest of {} {}{}, delete after {} {}{}.".format(
period_amount, self.TIME_DESC[period_unit], period_amount!=1 and "s" or "", ttl_amount, self.TIME_DESC[ttl_unit], ttl_amount!=1 and "s" or "" )
def __str__(self):
"""get schedule as a schedule string"""
return(self.rule_str)
def __init__(self, rule_str):
self.parse_rule(rule_str)
pass
class Thinner:
"""progressive thinner (universal, used for cleaning up snapshots)"""
def __init__(self, schedule_str=""):
"""schedule_str: comma seperated list of ThinnerRules. A plain number specifies how many snapshots to always keep.
"""
self.rules=[]
self.always_keep=0
if schedule_str=="":
return
rule_strs=schedule_str.split(",")
for rule_str in rule_strs:
if rule_str.isdigit():
self.always_keep=int(rule_str)
if self.always_keep<0:
raise(Exception("Number of snapshots to keep cant be negative: {}".format(self.keep_source)))
else:
self.rules.append(ThinnerRule(rule_str))
def human_rules(self):
"""get list of human readable rules"""
ret=[]
if self.always_keep:
ret.append("Keep the last {} snapshot{}.".format(self.always_keep, self.always_keep!=1 and "s" or ""))
for rule in self.rules:
ret.append(rule.human_str)
return(ret)
def thin(self,objects, keep_objects=[], now=None):
"""thin list of objects with current schedule rules.
objects: list of objects to thin. every object should have timestamp attribute.
keep_objects: objects to always keep (these should also be in normal objects list, so we can use them to perhaps delete other obsolete objects)
return( keeps, removes )
"""
#always keep a number of the last objets?
if self.always_keep:
#all of them
if len(objects)<=self.always_keep:
return ( (objects, []) )
#determine which ones
always_keep_objects=objects[-self.always_keep:]
else:
always_keep_objects=[]
#determine time blocks
time_blocks={}
for rule in self.rules:
time_blocks[rule.period]={}
if not now:
now=int(time.time())
keeps=[]
removes=[]
#traverse objects
for object in objects:
#important they are ints!
timestamp=int(object.timestamp)
age=int(now)-timestamp
# store in the correct time blocks, per period-size, if not too old yet
# e.g.: look if there is ANY timeblock that wants to keep this object
keep=False
for rule in self.rules:
if age<=rule.ttl:
block_nr=int(timestamp/rule.period)
if not block_nr in time_blocks[rule.period]:
time_blocks[rule.period][block_nr]=True
keep=True
#keep it according to schedule, or keep it because it is in the keep_objects list
if keep or object in keep_objects or object in always_keep_objects:
keeps.append(object)
else:
removes.append(object)
return( (keeps, removes) )
# ######### Thinner testing code
# now=int(time.time())
#
# t=Thinner("1d1w,1w1m,1m6m,1y2y", always_keep=1)
#
# import random
#
# class Thing:
# def __init__(self, timestamp):
# self.timestamp=timestamp
#
# def __str__(self):
# age=now-self.timestamp
# struct=time.localtime(self.timestamp)
# return("{} ({} days old)".format(time.strftime("%Y-%m-%d %H:%M:%S",struct),int(age/(3600*24))))
#
# def test():
# global now
# things=[]
#
# while True:
# print("#################### {}".format(time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(now))))
#
# (keeps, removes)=t.run(things, now)
#
# print ("### KEEP ")
# for thing in keeps:
# print(thing)
#
# print ("### REMOVE ")
# for thing in removes:
# print(thing)
#
# things=keeps
#
# #increase random amount of time and maybe add a thing
# now=now+random.randint(0,160000)
# if random.random()>=0:
# things.append(Thing(now))
#
# sys.stdin.readline()
#
# test()
class cached_property(object):
""" A property that is only computed once per instance and then replaces
itself with an ordinary attribute. Deleting the attribute resets the
property.
Source: https://github.com/bottlepy/bottle/commit/fa7733e075da0d790d809aa3d2f53071897e6f76
"""
def __init__(self, func):
self.__doc__ = getattr(func, '__doc__')
self.func = func
def __get__(self, obj, cls):
if obj is None:
return self
propname=self.func.__name__
#store directly in dict so its cached from now on
# value = obj.__dict__[propname] = self.func(obj)
if not hasattr(obj, '_cached_properties'):
obj._cached_properties={}
if not propname in obj._cached_properties:
obj._cached_properties[propname]=self.func(obj)
# value = obj.__dict__[propname] = self.func(obj)
return obj._cached_properties[propname]
class ExecuteNode:
"""an endpoint to execute local or remote commands via ssh"""
def __init__(self, ssh_to=None, readonly=False, debug_output=False):
"""ssh_to: server you want to ssh to. none means local
readonly: only execute commands that dont make any changes (usefull for testing-runs)
debug_output: show output and exit codes of commands in debugging output.
"""
self.ssh_to=ssh_to
self.readonly=readonly
self.debug_output=debug_output
def __repr__(self):
if self.ssh_to==None:
return("(local)")
else:
return(self.ssh_to)
def run(self, cmd, input=None, tab_split=False, valid_exitcodes=[ 0 ], readonly=False, hide_errors=False, pipe=False):
"""run a command on the node
readonly: make this True if the command doesnt make any changes and is safe to execute in testmode
pipe: Instead of executing, return a pipe-handle to be used to input to another run() command. (just like a | in linux)
input: Can be None, a string or a pipe-handle you got from another run()
"""
encoded_cmd=[]
#use ssh?
if self.ssh_to != None:
encoded_cmd.extend(["ssh", self.ssh_to])
#make sure the command gets all the data in utf8 format:
#(this is neccesary if LC_ALL=en_US.utf8 is not set in the environment)
for arg in cmd:
#add single quotes for remote commands to support spaces and other wierd stuff (remote commands are executed in a shell)
encoded_cmd.append( ("'"+arg+"'").encode('utf-8'))
else:
for arg in cmd:
encoded_cmd.append(arg.encode('utf-8'))
#debug and test stuff
debug_txt=" ".join(encoded_cmd)
if pipe:
debug_txt=debug_txt+" |"
if self.readonly and not readonly:
self.debug("SKIP > "+ debug_txt)
else:
if pipe:
self.debug("PIPE > "+ debug_txt)
else:
self.debug("RUN > "+ debug_txt)
#determine stdin
if input==None:
stdin=None
elif isinstance(input,str):
self.debug("INPUT > \n"+input.rstrip())
stdin=subprocess.PIPE
elif isinstance(input, subprocess.Popen):
self.debug("Piping input")
stdin=input.stdout
else:
abort("Incompatible input")
if self.readonly and not readonly:
#todo: what happens if input is piped?
return
#execute and parse/return results
p=subprocess.Popen(encoded_cmd, env=os.environ, stdout=subprocess.PIPE, stdin=stdin, stderr=subprocess.PIPE)
#Note: make streaming?
if isinstance(input,str):
p.stdin.write(input)
if pipe:
return(p)
#handle all outputs
if isinstance(input, subprocess.Popen):
selectors=[p.stdout, p.stderr, input.stderr ]
input.stdout.close() #otherwise inputprocess wont exit when ours does
else:
selectors=[p.stdout, p.stderr ]
output_lines=[]
while True:
(read_ready, write_ready, ex_ready)=select.select(selectors, [], [])
eof_count=0
if p.stdout in read_ready:
line=p.stdout.readline()
if line!="":
output_lines.append(line.rstrip())
if self.debug_output:
self.debug("STDOUT > "+line.rstrip())
else:
eof_count=eof_count+1
if p.stderr in read_ready:
line=p.stderr.readline()
if line!="":
if hide_errors:
self.debug("STDERR > "+line.rstrip())
else:
self.error("STDERR > "+line.rstrip())
else:
eof_count=eof_count+1
if isinstance(input, subprocess.Popen) and (input.stderr in read_ready):
line=input.stderr.readline()
if line!="":
if hide_errors:
self.debug("STDERR|> "+line.rstrip())
else:
self.error("STDERR|> "+line.rstrip())
else:
eof_count=eof_count+1
#stop if both processes are done and all filehandles are EOF:
if p.poll()!=None and ((not isinstance(input, subprocess.Popen)) or input.poll()!=None) and eof_count==len(selectors):
break
#handle piped process error output and exit codes
if isinstance(input, subprocess.Popen):
if self.debug_output:
self.debug("EXIT |> {}".format(input.returncode))
if input.returncode not in valid_exitcodes:
raise(subprocess.CalledProcessError(input.returncode, "(pipe)"))
if self.debug_output:
self.debug("EXIT > {}".format(p.returncode))
if p.returncode not in valid_exitcodes:
raise(subprocess.CalledProcessError(p.returncode, encoded_cmd))
if not tab_split:
return(output_lines)
else:
ret=[]
for line in output_lines:
ret.append(line.split("\t"))
return(ret)
class ZfsDataset():
"""a zfs dataset (filesystem/volume/snapshot/clone)
Note that a dataset doesnt have to actually exist (yet/anymore)
Also most properties are cached for performance-reasons, but also to allow --test to function correctly.
"""
def __init__(self, zfs_node, name, force_exists=None):
"""name: full path of the zfs dataset
exists: specifiy if you already know a dataset exists or not. for performance reasons. (othewise it will have to check with zfs list when needed)
"""
self.zfs_node=zfs_node
self.name=name #full name
self.force_exists=force_exists
def __repr__(self):
return("{}: {}".format(self.zfs_node, self.name))
def __str__(self):
return(self.name)
def __eq__(self, obj):
return(self.name == obj.name)
def verbose(self,txt):
self.zfs_node.verbose("{}: {}".format(self.name, txt))
def error(self,txt):
self.zfs_node.error("{}: {}".format(self.name, txt))
def debug(self,txt):
self.zfs_node.debug("{}: {}".format(self.name, txt))
def invalidate(self):
"""clear cache"""
#TODO: nicer?
self._cached_properties={}
self.force_exists=None
def lstrip_path(self,count):
"""return name with first count components stripped"""
return("/".join(self.name.split("/")[count:]))
def rstrip_path(self,count):
"""return name with last count components stripped"""
return("/".join(self.name.split("/")[:-count]))
@property
def filesystem_name(self):
"""filesystem part of the name (before the @)"""
if self.is_snapshot:
( filesystem, snapshot )=self.name.split("@")
return(filesystem)
else:
return(self.name)
@property
def snapshot_name(self):
"""snapshot part of the name"""
if not self.is_snapshot:
raise(Exception("This is not a snapshot"))
(filesystem, snapshot_name)=self.name.split("@")
return(snapshot_name)
@property
def is_snapshot(self):
"""true if this dataset is a snapshot"""
return(self.name.find("@")!=-1)
@cached_property
def parent(self):
"""get zfs-parent of this dataset.
for snapshots this means it will get the filesystem/volume that it belongs to. otherwise it will return the parent according to path
we cache this so everything in the parent that is cached also stays.
"""
if self.is_snapshot:
return(ZfsDataset(self.zfs_node, self.filesystem_name))
else:
return(ZfsDataset(self.zfs_node, self.rstrip_path(1)))
def find_our_prev_snapshot(self, snapshot):
"""find our previous snapshot in this dataset. None if it doesnt exist"""
if self.is_snapshot:
raise(Exception("Please call this on a dataset."))
try:
index=self.our_snapshots.index(snapshot)
if index>0:
return(self.our_snapshots[index-1])
else:
return(None)
except:
return(None)
def find_our_next_snapshot(self, snapshot):
"""find our next snapshot in this dataset. None if it doesnt exist"""
if self.is_snapshot:
raise(Exception("Please call this on a dataset."))
try:
index=self.our_snapshots.index(snapshot)
if index>=0 and index<len(self.our_snapshots)-1:
return(self.our_snapshots[index+1])
else:
return(None)
except:
return(None)
@cached_property
def exists(self):
"""check if dataset exists.
Use force to force a specific value to be cached, if you already know. Usefull for performance reasons"""
if self.force_exists!=None:
self.debug("Checking if filesystem exists: forced to {}".format(self.force_exists))
return(self.force_exists)
else:
self.debug("Checking if filesystem exists")
return(self.zfs_node.run(tab_split=True, cmd=[ "zfs", "list", self.name], readonly=True, valid_exitcodes=[ 0,1 ], hide_errors=True) and True)
def create_filesystem(self, parents=False):
"""create a filesytem"""
if parents:
self.verbose("Creating filesystem and parents")
self.zfs_node.run(["zfs", "create", "-p", self.name ])
else:
self.verbose("Creating filesystem")
self.zfs_node.run(["zfs", "create", self.name ])
#update cache
self.exists=1
def destroy(self):
self.verbose("Destroying")
self.zfs_node.run(["zfs", "destroy", self.name])
self.invalidate()
self.force_exists=False
@cached_property
def properties(self):
"""all zfs properties"""
self.debug("Getting zfs properties")
cmd=[
"zfs", "get", "-H", "-o", "property,value", "all", self.name
]
if not self.exists:
return({})
return(dict(self.zfs_node.run(tab_split=True, cmd=cmd, readonly=True, valid_exitcodes=[ 0 ])))
def is_changed(self):
"""dataset is changed since ANY latest snapshot ?"""
self.debug("Checking if dataset is changed")
if self.properties['written']=="0B" or self.properties['written']=="0":
return(False)
else:
return(True)
def is_ours(self):
"""return true if this snapshot is created by this backup_nanme"""
if re.match("^"+self.zfs_node.backup_name+"-[0-9]*$", self.snapshot_name):
return(True)
else:
return(False)
@property
def timestamp(self):
"""get timestamp from snapshot name. Only works for our own snapshots with the correct format."""
time_str=re.findall("^.*-([0-9]*)$", self.snapshot_name)[0]
if len(time_str)!=14:
raise(Exception("Snapshot has invalid timestamp in name: {}".format(self.snapshot_name)))
#new format:
time_secs=time.mktime(time.strptime(time_str,"%Y%m%d%H%M%S"))
return(time_secs)
def from_names(self, names):
"""convert a list of names to a list ZfsDatasets for this zfs_node"""
ret=[]
for name in names:
ret.append(ZfsDataset(self.zfs_node, name))
return(ret)
@cached_property
def snapshots(self):
"""get all snapshots of this dataset"""
self.debug("Getting snapshots")
if not self.exists:
return([])
cmd=[
"zfs", "list", "-d", "1", "-r", "-t" ,"snapshot", "-H", "-o", "name", self.name
]
names=self.zfs_node.run(cmd=cmd, readonly=True)
return(self.from_names(names))
@property
def our_snapshots(self):
"""get list of snapshots creates by us of this dataset"""
ret=[]
for snapshot in self.snapshots:
if snapshot.is_ours():
ret.append(snapshot)
return(ret)
# def progressive_thinning(self, schedule):
# """cleanup snapshots by progressive thinning schedule"""
def find_snapshot(self, snapshot_name):
"""find snapshot by snapshot_name"""
for snapshot in self.our_snapshots:
if snapshot.snapshot_name==snapshot_name:
return(snapshot)
return(None)
@cached_property
def is_changed_ours(self):
"""dataset is changed since OUR latest snapshot?"""
self.debug("Checking if dataset is changed since our snapshot")
if not self.our_snapshots:
return(True)
latest_snapshot=self.snapshots[-1]
cmd=[ "zfs", "get","-H" ,"-ovalue", "written@"+str(latest_snapshot), self.name ]
output=self.zfs_node.run(readonly=True, tab_split=False, cmd=cmd, valid_exitcodes=[ 0 ])
if output[0]=="0B" or output[0]=="0":
return(False)
return(True)
@cached_property
def recursive_datasets(self, types="filesystem,volume"):
"""get all datasets recursively under us"""
self.debug("Getting all datasets under us")
names=self.zfs_node.run(tab_split=False, readonly=True, valid_exitcodes=[ 0 ], cmd=[
"zfs", "list", "-r", "-t", types, "-o", "name", "-H", self.name
])
return(self.from_names(names[1:]))
def send_pipe(self, prev_snapshot=None, resume=True, resume_token=None, show_progress=False):
"""returns a pipe with zfs send output for this snapshot
resume: Use resuming (both sides need to support it)
resume_token: resume sending from this token. (in that case we dont need to know snapshot names)
"""
#### build source command
cmd=[]
cmd.extend(["zfs", "send", ])
#all kind of performance options:
cmd.append("-L") # large block support
cmd.append("-e") # WRITE_EMBEDDED, more compact stream
cmd.append("-c") # use compressed WRITE records
if not resume:
cmd.append("-D") # dedupped stream, sends less duplicate data
#progress output
if show_progress:
cmd.append("-v")
cmd.append("-P")
#resume a previous send? (dont need more parameters in that case)
if resume_token:
cmd.extend([ "-t", resume_token ])
else:
#send properties
cmd.append("-p")
#incremental?
if prev_snapshot:
cmd.extend([ "-i", prev_snapshot.snapshot_name ])
cmd.append(self.name)
# if args.buffer and args.ssh_source!="local":
# cmd.append("|mbuffer -m {}".format(args.buffer))
#NOTE: this doenst start the send yet, it only returns a subprocess.Pipe
return(self.zfs_node.run(cmd, pipe=True))
def recv_pipe(self, pipe, resume=True):
"""starts a zfs recv for this snapshot and uses pipe as input
note: you can also call both a snapshot and filesystem object.
the resulting zfs command is the same, only our object cache is invalidated differently.
"""
#### build target command
cmd=[]
cmd.extend(["zfs", "recv"])
#dont mount filesystem that is received
cmd.append("-u")
# filter certain properties on receive (usefull for linux->freebsd in some cases)
# if args.filter_properties:
# for filter_property in args.filter_properties:
# cmd.extend([ "-x" , filter_property ])
#verbose output
cmd.append("-v")
if resume:
#support resuming
cmd.append("-s")
cmd.append(self.filesystem_name)
self.zfs_node.run(cmd, input=pipe)
#invalidate cache, but we at least know we exist now
self.invalidate()
#in test mode we assume everything was ok and it exists
if self.zfs_node.readonly:
self.force_exists=True
#check if transfer was really ok (exit codes have been wrong before and can be ignore by some parameters)
if not self.exists:
raise(Exception("Target doesnt exist after transfer, something went wrong."))
# if args.buffer and args.ssh_target!="local":
# cmd.append("|mbuffer -m {}".format(args.buffer))
def transfer_snapshot(self, target_snapshot, prev_snapshot=None, resume=True, show_progress=False):
"""transfer this snapshot to target_snapshot. specify prev_snapshot for incremental transfer
connects a send_pipe() to recv_pipe()
"""
self.debug("Transfer snapshot to {}".format(target_snapshot.filesystem_name))
#initial or resume
if not prev_snapshot:
target_snapshot.verbose("receiving full".format(self.snapshot_name))
else:
#incemental
target_snapshot.verbose("receiving incremental".format(self.snapshot_name))
#do it
pipe=self.send_pipe(resume=resume, show_progress=show_progress, prev_snapshot=prev_snapshot)
target_snapshot.recv_pipe(pipe, resume=resume)
def resume_transfer(self, target_dataset, show_progress=False):
"""resume an interrupted transfer, if there is one"""
#resume is a kind of special case since we dont know which snapshot we are transferring. (its encoded in the resume token)
if 'receive_resume_token' in target_dataset.properties:
target_dataset.verbose("resuming")
#just send and recv on dataset instead of snapshot object.
pipe=self.send_pipe(show_progress=show_progress, resume_token=target_dataset.properties['receive_resume_token'])
target_dataset.recv_pipe(pipe,resume=True)
return(True)
return(False)
def thin(self, keeps=[]):
"""determines list of snapshots that should be kept or deleted based on the thinning schedule. cull the herd!
keep: list of snapshots to always keep (usually the last)
returns: ( keeps, obsoletes )
"""
return(self.zfs_node.thinner.thin(self.our_snapshots, keep_objects=keeps))
def find_common_snapshot(self, target_dataset):
"""find latest coommon snapshot between us and target
returns None if its an initial transfer
"""
if not target_dataset.our_snapshots:
#target has nothing yet
return(None)
else:
snapshot=self.find_snapshot(target_dataset.our_snapshots[-1].snapshot_name)
if not snapshot:
raise(Exception("Cant find latest target snapshot on source"))
snapshot.debug("common snapshot")
return(snapshot)
def sync_snapshots(self, target_dataset, show_progress=False, resume=True):
"""sync our snapshots to target_dataset"""
#resume something first?
if self.resume_transfer(target_dataset, show_progress):
#running in readonly mode and no snapshots yet? assume initial snapshot (otherwise we cant find common snapshot in next step)
if self.zfs_node.readonly and not target_dataset.our_snapshots:
target_dataset.snapshots.append(ZfsDataset(target_dataset.zfs_node, target_dataset.name + "@" + self.our_snapshots[0].snapshot_name))
#determine start snapshot (the first snapshot after the common snapshot)
target_dataset.debug("Determining start snapshot")
common_snapshot=self.find_common_snapshot(target_dataset)
if not common_snapshot:
#start from beginning
start_snapshot=self.our_snapshots[0]
else:
start_snapshot=self.find_our_next_snapshot(common_snapshot)
#create virtual target snapshots
target_dataset.debug("Creating virtual target snapshots")
source_snapshot=start_snapshot
while source_snapshot:
#create virtual target snapshot
virtual_snapshot=ZfsDataset(target_dataset.zfs_node, target_dataset.filesystem_name+"@"+source_snapshot.snapshot_name,force_exists=False)
target_dataset.snapshots.append(virtual_snapshot)
source_snapshot=self.find_our_next_snapshot(source_snapshot)
#now let thinner decide what we want on both sides
self.debug("Create thinning list")
(source_keeps, source_obsoletes)=self.thin(keeps=[self.our_snapshots[-1]])
(target_keeps, target_obsoletes)=target_dataset.thin(keeps=[target_dataset.our_snapshots[-1]])
#stuff that is before common snapshot can be deleted rightaway
if common_snapshot:
for source_snapshot in self.our_snapshots:
if source_snapshot.snapshot_name==common_snapshot.snapshot_name:
break
if source_snapshot in source_obsoletes:
source_snapshot.destroy()
for target_snapshot in target_dataset.our_snapshots:
if target_snapshot.snapshot_name==common_snapshot.snapshot_name:
break
if target_snapshot in target_obsoletes:
target_snapshot.destroy()
#now send/destroy the rest off the source
prev_source_snapshot=common_snapshot
source_snapshot=start_snapshot
while source_snapshot:
target_snapshot=target_dataset.find_snapshot(source_snapshot.snapshot_name) #"virtual"
#does target actually want it?
if target_snapshot in target_keeps:
source_snapshot.transfer_snapshot(target_snapshot, prev_snapshot=prev_source_snapshot, show_progress=show_progress, resume=resume)
else:
source_snapshot.debug("skipped (target doesnt need it)")
#we may destroy the previous snapshot now, if we dont want it anymore
if prev_source_snapshot and (prev_source_snapshot not in source_keeps):
prev_source_snapshot.destroy()
prev_source_snapshot=source_snapshot
source_snapshot=self.find_our_next_snapshot(source_snapshot)
class ZfsNode(ExecuteNode):
"""a node that contains zfs datasets. implements global (systemwide/pool wide) zfs commands"""
def __init__(self, backup_name, zfs_autobackup, ssh_to=None, readonly=False, description="", debug_output=False, thinner=Thinner()):
self.backup_name=backup_name
if not description:
self.description=ssh_to
else:
self.description=description
self.zfs_autobackup=zfs_autobackup #for logging
rules=thinner.human_rules()
if rules:
for rule in rules:
self.verbose(rule)
else:
self.verbose("Keep no old snaphots")
self.thinner=thinner
ExecuteNode.__init__(self, ssh_to=ssh_to, readonly=readonly, debug_output=debug_output)
def verbose(self,txt):
self.zfs_autobackup.verbose("{} {}".format(self.description, txt))
def error(self,txt,titles=[]):
self.zfs_autobackup.error("{} {}".format(self.description, txt))
def debug(self,txt, titles=[]):
self.zfs_autobackup.debug("{} {}".format(self.description, txt))
def new_snapshotname(self):
"""determine uniq new snapshotname"""
return(self.backup_name+"-"+time.strftime("%Y%m%d%H%M%S"))
def consistent_snapshot(self, datasets, snapshot_name, allow_empty=True):
"""create a consistent (atomic) snapshot of specified datasets.
allow_empty: Allow empty snapshots. (compared to our latest snapshot)
"""
cmd=[ "zfs", "snapshot" ]
noop=True
for dataset in datasets:
if not allow_empty:
if not dataset.is_changed_ours:
dataset.verbose("No changes since {}".format(dataset.our_snapshots[-1].snapshot_name))
continue
snapshot=ZfsDataset(dataset.zfs_node, dataset.name+"@"+snapshot_name)
cmd.append(str(snapshot))
#add snapshot to cache (also usefull in testmode)
dataset.snapshots.append(snapshot)
noop=False
if noop:
self.verbose("No changes, not creating snapshot.")
else:
self.verbose("Creating snapshot {}".format(snapshot_name))
self.run(cmd, readonly=False)
@cached_property
def selected_datasets(self):
"""determine filesystems that should be backupped by looking at the special autobackup-property, systemwide
returns: list of ZfsDataset
"""
#get all source filesystems that have the backup property
lines=self.run(tab_split=True, readonly=True, cmd=[
"zfs", "get", "-t", "volume,filesystem", "-o", "name,value,source", "-s", "local,inherited", "-H", "autobackup:"+self.backup_name
])
#determine filesystems that should be actually backupped
selected_filesystems=[]
direct_filesystems=[]
for line in lines:
(name,value,source)=line
dataset=ZfsDataset(self, name)
if value=="false":
dataset.verbose("Ignored (disabled)")
else:
if source=="local" and ( value=="true" or value=="child"):
direct_filesystems.append(name)
if source=="local" and value=="true":
dataset.verbose("Selected (direct selection)")
selected_filesystems.append(dataset)
elif source.find("inherited from ")==0 and (value=="true" or value=="child"):
inherited_from=re.sub("^inherited from ", "", source)
if inherited_from in direct_filesystems:
selected_filesystems.append(dataset)
dataset.verbose("Selected (inherited selection)")
else:
dataset.verbose("Ignored (already a backup)")
else:
dataset.verbose("Ignored (only childs)")
return(selected_filesystems)
class ZfsAutobackup:
"""main class"""
def __init__(self):
parser = argparse.ArgumentParser(
description='ZFS autobackup v3.0',
epilog='When a filesystem fails, zfs_backup will continue and report the number of failures at that end. Also the exit code will indicate the number of failures.')
parser.add_argument('--ssh-source', default=None, help='Source host to get backup from. (user@hostname) Default %(default)s.')
parser.add_argument('--ssh-target', default=None, help='Target host to push backup to. (user@hostname) Default %(default)s.')
parser.add_argument('--keep-source', type=str, default="10,1d1w,1w1m,1m1y", help='Thinning schedule for old source snapshots. Default: %(default)s')
parser.add_argument('--keep-target', type=str, default="10,1d1w,1w1m,1m1y", help='Thinning schedule for old target snapshots. Default: %(default)s')
parser.add_argument('backup_name', help='Name of the backup (you should set the zfs property "autobackup:backup-name" to true on filesystems you want to backup')
parser.add_argument('target_path', help='Target ZFS filesystem')
parser.add_argument('--no-snapshot', action='store_true', help='dont create new snapshot (usefull for finishing uncompleted backups, or cleanups)')
parser.add_argument('--no-send', action='store_true', help='dont send snapshots (usefull to only do a cleanup)')
parser.add_argument('--allow-empty', action='store_true', help='if nothing has changed, still create empty snapshots.')
parser.add_argument('--ignore-replicated', action='store_true', help='Ignore datasets that seem to be replicated some other way. (No changes since lastest snapshot. Usefull for proxmox HA replication)')
parser.add_argument('--no-holds', action='store_true', help='Dont lock snapshots on the source. (Usefull to allow proxmox HA replication to switches nodes)')
parser.add_argument('--ignore-new', action='store_true', help='Ignore filesystem if there are already newer snapshots for it on the target (use with caution)')
parser.add_argument('--resume', action='store_true', help='support resuming of interrupted transfers by using the zfs extensible_dataset feature (both zpools should have it enabled) Disadvantage is that you need to use zfs recv -A if another snapshot is created on the target during a receive. Otherwise it will keep failing.')
parser.add_argument('--strip-path', default=0, type=int, help='number of directory to strip from path (use 1 when cloning zones between 2 SmartOS machines)')
parser.add_argument('--buffer', default="", help='Use mbuffer with specified size to speedup zfs transfer. (e.g. --buffer 1G) Will also show nice progress output.')
# parser.add_argument('--destroy-stale', action='store_true', help='Destroy stale backups that have no more snapshots. Be sure to verify the output before using this! ')
parser.add_argument('--properties', default=None, help='Comma seperated list of zfs properties that should be synced to target. (Quotas are always disabled temporarily)')
parser.add_argument('--rollback', action='store_true', help='Rollback changes on the target before starting a backup. (normally you can prevent changes by setting the readonly property on the target_path to on)')
parser.add_argument('--ignore-transfer-errors', action='store_true', help='Ignore transfer errors (still checks if received filesystem exists. usefull for acltype errors)')
parser.add_argument('--test', action='store_true', help='dont change anything, just show what would be done (still does all read-only operations)')
parser.add_argument('--verbose', action='store_true', help='verbose output')
parser.add_argument('--debug', action='store_true', help='Show zfs commands that are executed.')
parser.add_argument('--debug-output', action='store_true', help='Show zfs commands and their output/exit codes. (noisy)')
parser.add_argument('--progress', action='store_true', help='show zfs progress output (to stderr)')
#note args is the only global variable we use, since its a global readonly setting anyway
args = parser.parse_args()
self.args=args
if args.debug_output:
args.debug=True
self.log=Log(show_debug=self.args.debug, show_verbose=self.args.verbose)
def verbose(self,txt,titles=[]):
self.log.verbose(txt)
def error(self,txt,titles=[]):
self.log.error(txt)
def debug(self,txt, titles=[]):
self.log.debug(txt)
def set_title(self, title):
self.log.verbose("")
self.log.verbose("#### "+title)
def run(self):
self.set_title("Snapshot schedule")
description="[Source]"
source_thinner=Thinner(self.args.keep_source)
source_node=ZfsNode(self.args.backup_name, self, ssh_to=self.args.ssh_source, readonly=self.args.test, debug_output=self.args.debug_output, description=description, thinner=source_thinner)
description="[Target]"
target_thinner=Thinner(self.args.keep_target)
target_node=ZfsNode(self.args.backup_name, self, ssh_to=self.args.ssh_target, readonly=self.args.test, debug_output=self.args.debug_output, description=description, thinner=target_thinner)
# target_node.run(["/root/outputtest"], readonly=True)
self.set_title("Selecting")
source_datasets=source_node.selected_datasets
if not source_datasets:
abort("No source filesystems selected, please do a 'zfs set autobackup:{0}=true' on {1}".format(self.args.backup_name, self.args.ssh_source))
if not self.args.no_snapshot:
self.set_title("Snapshotting")
source_node.consistent_snapshot(source_datasets, source_node.new_snapshotname(), allow_empty=self.args.allow_empty)
self.set_title("Transferring")
for source_dataset in source_datasets:
try:
if self.args.ignore_replicated and not source_dataset.is_changed():
source_dataset.verbose("Already replicated")
else:
#determine corresponding target_dataset
target_name=self.args.target_path + "/" + source_dataset.lstrip_path(self.args.strip_path)
target_dataset=ZfsDataset(target_node, target_name)
#ensure parents exists
if not target_dataset.parent.exists:
target_dataset.parent.create_filesystem(parents=True)
source_dataset.sync_snapshots(target_dataset, show_progress=self.args.progress, resume=self.args.resume)
except Exception as e:
source_dataset.error(str(e))
if self.args.debug:
raise
zfs_autobackup=ZfsAutobackup()
zfs_autobackup.run()