mirror of
https://github.com/psy0rz/zfs_autobackup.git
synced 2025-04-11 22:40:01 +03:00
zfs_autobackup 2.4: try to continue on non-fatal errors
This commit is contained in:
parent
c176b968a9
commit
54235f455a
@ -18,7 +18,7 @@ It has the following features:
|
||||
* Supports resuming of interrupted transfers. (via the zfs extensible_dataset feature)
|
||||
* Backups and snapshots can be named to prevent conflicts. (multiple backups from and to the same filesystems are no problem)
|
||||
* Always creates a new snapshot before starting.
|
||||
* Checks everything and aborts on errors.
|
||||
* Checks everything but tries continue on non-fatal errors when possible. (Reports error-count when done)
|
||||
* Ability to 'finish' aborted backups to see what goes wrong.
|
||||
* Easy to debug and has a test-mode. Actual unix commands are printed.
|
||||
* Keeps latest X snapshots remote and locally. (default 30, configurable)
|
||||
@ -42,7 +42,7 @@ usage: zfs_autobackup [-h] [--ssh-source SSH_SOURCE] [--ssh-target SSH_TARGET]
|
||||
[--debug]
|
||||
backup_name target_path
|
||||
|
||||
ZFS autobackup v2.3
|
||||
ZFS autobackup v2.4
|
||||
|
||||
positional arguments:
|
||||
backup_name Name of the backup (you should set the zfs property
|
||||
@ -108,6 +108,9 @@ optional arguments:
|
||||
(still does all read-only operations)
|
||||
--verbose verbose output
|
||||
--debug debug output (shows commands that are executed)
|
||||
|
||||
When a filesystem fails, zfs_backup will continue and report the number of
|
||||
failures at that end. Also the exit code will indicate the number of failures.
|
||||
```
|
||||
|
||||
Backup example
|
||||
|
244
zfs_autobackup
244
zfs_autobackup
@ -13,18 +13,20 @@ import time
|
||||
def error(txt):
|
||||
print(txt, file=sys.stderr)
|
||||
|
||||
|
||||
|
||||
def verbose(txt):
|
||||
if args.verbose:
|
||||
print(txt)
|
||||
|
||||
|
||||
|
||||
def debug(txt):
|
||||
if args.debug:
|
||||
print(txt)
|
||||
|
||||
#fatal abort execution, exit code 255
|
||||
def abort(txt):
|
||||
error(txt)
|
||||
sys.exit(255)
|
||||
|
||||
|
||||
|
||||
"""run a command. specifiy ssh user@host to run remotely"""
|
||||
def run(cmd, input=None, ssh_to="local", tab_split=False, valid_exitcodes=[ 0 ], test=False):
|
||||
@ -473,6 +475,15 @@ def zfs_get_unchanged_filesystems(ssh_to, filesystems):
|
||||
|
||||
|
||||
|
||||
#fugly..
|
||||
failures=0
|
||||
#something failed, but we try to continue with the rest
|
||||
def failed(txt):
|
||||
global failures
|
||||
failures=failures+1
|
||||
error("FAILURE: "+txt+"\n")
|
||||
|
||||
|
||||
def zfs_autobackup():
|
||||
|
||||
############## data gathering section
|
||||
@ -490,8 +501,7 @@ def zfs_autobackup():
|
||||
|
||||
#nothing todo
|
||||
if not source_filesystems:
|
||||
error("No source filesystems selected, please do a 'zfs set autobackup:{0}=true' on {1}".format(args.backup_name,args.ssh_source))
|
||||
sys.exit(1)
|
||||
abort("No source filesystems selected, please do a 'zfs set autobackup:{0}=true' on {1}".format(args.backup_name,args.ssh_source))
|
||||
|
||||
if args.ignore_replicated:
|
||||
replicated_filesystems=zfs_get_unchanged_filesystems(args.ssh_source, source_filesystems)
|
||||
@ -540,7 +550,6 @@ def zfs_autobackup():
|
||||
|
||||
|
||||
### get eixsting source snapshots
|
||||
|
||||
verbose("Getting source snapshot-list from {0}".format(args.ssh_source))
|
||||
source_snapshots=zfs_get_snapshots(args.ssh_source, source_filesystems, args.backup_name)
|
||||
debug("Source snapshots:\n" + str(pprint.pformat(source_snapshots)))
|
||||
@ -588,114 +597,117 @@ def zfs_autobackup():
|
||||
|
||||
#determine which snapshots to send for each filesystem
|
||||
for source_filesystem in source_filesystems:
|
||||
target_filesystem=args.target_path + "/" + lstrip_path(source_filesystem, args.strip_path)
|
||||
try:
|
||||
target_filesystem=args.target_path + "/" + lstrip_path(source_filesystem, args.strip_path)
|
||||
|
||||
if source_filesystem not in source_snapshots:
|
||||
#this happens if you use --no-snapshot and there are new filesystems without snapshots
|
||||
verbose("* Skipping source filesystem {0}, no snapshots found".format(source_filesystem))
|
||||
else:
|
||||
|
||||
#incremental or initial send?
|
||||
if target_filesystem in target_snapshots and target_snapshots[target_filesystem]:
|
||||
#incremental mode, determine what to send and what is obsolete
|
||||
|
||||
#latest succesfully send snapshot, should be common on both source and target
|
||||
latest_target_snapshot=target_snapshots[target_filesystem][-1]
|
||||
|
||||
if latest_target_snapshot not in source_snapshots[source_filesystem]:
|
||||
#cant find latest target anymore. find first common snapshot and inform user
|
||||
error_msg="Cant find latest target snapshot on source, did you destroy/rename it?"
|
||||
error_msg=error_msg+"\nLatest on target : "+target_filesystem+"@"+latest_target_snapshot
|
||||
error_msg=error_msg+"\nMissing on source: "+source_filesystem+"@"+latest_target_snapshot
|
||||
found=False
|
||||
for latest_target_snapshot in reversed(target_snapshots[target_filesystem]):
|
||||
if latest_target_snapshot in source_snapshots[source_filesystem]:
|
||||
error_msg=error_msg+"\nYou could solve this by rolling back to this common snapshot on target: "+target_filesystem+"@"+latest_target_snapshot
|
||||
found=True
|
||||
break
|
||||
if not found:
|
||||
error_msg=error_msg+"\nAlso could not find an earlier common snapshot to rollback to."
|
||||
else:
|
||||
if args.ignore_new:
|
||||
verbose("* Skipping source filesystem {0}, target already has newer snapshots.".format(source_filesystem))
|
||||
continue
|
||||
|
||||
raise(Exception(error_msg))
|
||||
|
||||
#send all new source snapshots that come AFTER the last target snapshot
|
||||
latest_source_index=source_snapshots[source_filesystem].index(latest_target_snapshot)
|
||||
send_snapshots=source_snapshots[source_filesystem][latest_source_index+1:]
|
||||
|
||||
#source snapshots that come BEFORE last target snapshot are obsolete
|
||||
source_obsolete_snapshots[source_filesystem]=source_snapshots[source_filesystem][0:latest_source_index]
|
||||
|
||||
#target snapshots that come BEFORE last target snapshot are obsolete
|
||||
latest_target_index=target_snapshots[target_filesystem].index(latest_target_snapshot)
|
||||
target_obsolete_snapshots[target_filesystem]=target_snapshots[target_filesystem][0:latest_target_index]
|
||||
if source_filesystem not in source_snapshots:
|
||||
#this happens if you use --no-snapshot and there are new filesystems without snapshots
|
||||
verbose("* Skipping source filesystem {0}, no snapshots found".format(source_filesystem))
|
||||
else:
|
||||
#initial mode, send all snapshots, nothing is obsolete:
|
||||
latest_target_snapshot=None
|
||||
send_snapshots=source_snapshots[source_filesystem]
|
||||
target_obsolete_snapshots[target_filesystem]=[]
|
||||
source_obsolete_snapshots[source_filesystem]=[]
|
||||
|
||||
#now actually send the snapshots
|
||||
if not args.no_send:
|
||||
#incremental or initial send?
|
||||
if target_filesystem in target_snapshots and target_snapshots[target_filesystem]:
|
||||
#incremental mode, determine what to send and what is obsolete
|
||||
|
||||
if send_snapshots and args.rollback and latest_target_snapshot:
|
||||
#roll back any changes on target
|
||||
debug("Rolling back target to latest snapshot.")
|
||||
run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "rollback", target_filesystem+"@"+latest_target_snapshot ])
|
||||
#latest succesfully send snapshot, should be common on both source and target
|
||||
latest_target_snapshot=target_snapshots[target_filesystem][-1]
|
||||
|
||||
if latest_target_snapshot not in source_snapshots[source_filesystem]:
|
||||
#cant find latest target anymore. find first common snapshot and inform user
|
||||
error_msg="Cant find latest target snapshot on source for '{}', did you destroy/rename it?".format(source_filesystem)
|
||||
error_msg=error_msg+"\nLatest on target : "+target_filesystem+"@"+latest_target_snapshot
|
||||
error_msg=error_msg+"\nMissing on source: "+source_filesystem+"@"+latest_target_snapshot
|
||||
found=False
|
||||
for latest_target_snapshot in reversed(target_snapshots[target_filesystem]):
|
||||
if latest_target_snapshot in source_snapshots[source_filesystem]:
|
||||
error_msg=error_msg+"\nYou could solve this by rolling back to this common snapshot on target: "+target_filesystem+"@"+latest_target_snapshot
|
||||
found=True
|
||||
break
|
||||
if not found:
|
||||
error_msg=error_msg+"\nAlso could not find an earlier common snapshot to rollback to."
|
||||
else:
|
||||
if args.ignore_new:
|
||||
verbose("* Skipping source filesystem '{0}', target already has newer snapshots.".format(source_filesystem))
|
||||
continue
|
||||
|
||||
raise(Exception(error_msg))
|
||||
|
||||
#send all new source snapshots that come AFTER the last target snapshot
|
||||
latest_source_index=source_snapshots[source_filesystem].index(latest_target_snapshot)
|
||||
send_snapshots=source_snapshots[source_filesystem][latest_source_index+1:]
|
||||
|
||||
#source snapshots that come BEFORE last target snapshot are obsolete
|
||||
source_obsolete_snapshots[source_filesystem]=source_snapshots[source_filesystem][0:latest_source_index]
|
||||
|
||||
#target snapshots that come BEFORE last target snapshot are obsolete
|
||||
latest_target_index=target_snapshots[target_filesystem].index(latest_target_snapshot)
|
||||
target_obsolete_snapshots[target_filesystem]=target_snapshots[target_filesystem][0:latest_target_index]
|
||||
else:
|
||||
#initial mode, send all snapshots, nothing is obsolete:
|
||||
latest_target_snapshot=None
|
||||
send_snapshots=source_snapshots[source_filesystem]
|
||||
target_obsolete_snapshots[target_filesystem]=[]
|
||||
source_obsolete_snapshots[source_filesystem]=[]
|
||||
|
||||
#now actually send the snapshots
|
||||
if not args.no_send:
|
||||
|
||||
if send_snapshots and args.rollback and latest_target_snapshot:
|
||||
#roll back any changes on target
|
||||
debug("Rolling back target to latest snapshot.")
|
||||
run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "rollback", target_filesystem+"@"+latest_target_snapshot ])
|
||||
|
||||
|
||||
for send_snapshot in send_snapshots:
|
||||
for send_snapshot in send_snapshots:
|
||||
|
||||
#resumable?
|
||||
if target_filesystem in resumable_target_filesystems:
|
||||
resume_token=resumable_target_filesystems.pop(target_filesystem)
|
||||
else:
|
||||
resume_token=None
|
||||
|
||||
#hold the snapshot we're sending on the source
|
||||
if not args.no_holds:
|
||||
zfs_hold_snapshot(ssh_to=args.ssh_source, snapshot=source_filesystem+"@"+send_snapshot)
|
||||
|
||||
zfs_transfer(
|
||||
ssh_source=args.ssh_source, source_filesystem=source_filesystem,
|
||||
first_snapshot=latest_target_snapshot, second_snapshot=send_snapshot,
|
||||
ssh_target=args.ssh_target, target_filesystem=target_filesystem,
|
||||
resume_token=resume_token
|
||||
)
|
||||
|
||||
#hold the snapshot we just send to the target
|
||||
zfs_hold_snapshot(ssh_to=args.ssh_target, snapshot=target_filesystem+"@"+send_snapshot)
|
||||
|
||||
|
||||
|
||||
#now that we succesfully transferred this snapshot, the previous snapshot is obsolete:
|
||||
if latest_target_snapshot:
|
||||
zfs_release_snapshot(ssh_to=args.ssh_target, snapshot=target_filesystem+"@"+latest_target_snapshot)
|
||||
target_obsolete_snapshots[target_filesystem].append(latest_target_snapshot)
|
||||
#resumable?
|
||||
if target_filesystem in resumable_target_filesystems:
|
||||
resume_token=resumable_target_filesystems.pop(target_filesystem)
|
||||
else:
|
||||
resume_token=None
|
||||
|
||||
#hold the snapshot we're sending on the source
|
||||
if not args.no_holds:
|
||||
zfs_release_snapshot(ssh_to=args.ssh_source, snapshot=source_filesystem+"@"+latest_target_snapshot)
|
||||
source_obsolete_snapshots[source_filesystem].append(latest_target_snapshot)
|
||||
#we just received a new filesytem?
|
||||
else:
|
||||
if args.clear_refreservation:
|
||||
debug("Clearing refreservation to save space.")
|
||||
zfs_hold_snapshot(ssh_to=args.ssh_source, snapshot=source_filesystem+"@"+send_snapshot)
|
||||
|
||||
run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "set", "refreservation=none", target_filesystem ])
|
||||
zfs_transfer(
|
||||
ssh_source=args.ssh_source, source_filesystem=source_filesystem,
|
||||
first_snapshot=latest_target_snapshot, second_snapshot=send_snapshot,
|
||||
ssh_target=args.ssh_target, target_filesystem=target_filesystem,
|
||||
resume_token=resume_token
|
||||
)
|
||||
|
||||
#hold the snapshot we just send to the target
|
||||
zfs_hold_snapshot(ssh_to=args.ssh_target, snapshot=target_filesystem+"@"+send_snapshot)
|
||||
|
||||
|
||||
if args.clear_mountpoint:
|
||||
debug("Setting canmount=noauto to prevent auto-mounting in the wrong place. (ignoring errors)")
|
||||
|
||||
run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "set", "canmount=noauto", target_filesystem ], valid_exitcodes= [0, 1] )
|
||||
#now that we succesfully transferred this snapshot, the previous snapshot is obsolete:
|
||||
if latest_target_snapshot:
|
||||
zfs_release_snapshot(ssh_to=args.ssh_target, snapshot=target_filesystem+"@"+latest_target_snapshot)
|
||||
target_obsolete_snapshots[target_filesystem].append(latest_target_snapshot)
|
||||
|
||||
if not args.no_holds:
|
||||
zfs_release_snapshot(ssh_to=args.ssh_source, snapshot=source_filesystem+"@"+latest_target_snapshot)
|
||||
source_obsolete_snapshots[source_filesystem].append(latest_target_snapshot)
|
||||
#we just received a new filesytem?
|
||||
else:
|
||||
if args.clear_refreservation:
|
||||
debug("Clearing refreservation to save space.")
|
||||
|
||||
run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "set", "refreservation=none", target_filesystem ])
|
||||
|
||||
|
||||
latest_target_snapshot=send_snapshot
|
||||
if args.clear_mountpoint:
|
||||
debug("Setting canmount=noauto to prevent auto-mounting in the wrong place. (ignoring errors)")
|
||||
|
||||
run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "set", "canmount=noauto", target_filesystem ], valid_exitcodes= [0, 1] )
|
||||
|
||||
|
||||
latest_target_snapshot=send_snapshot
|
||||
# failed, skip this source_filesystem
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
|
||||
|
||||
############## cleanup section
|
||||
@ -730,23 +742,28 @@ def zfs_autobackup():
|
||||
source_destroys=determine_destroy_list(source_obsolete_snapshots, args.keep_source)
|
||||
if source_destroys:
|
||||
verbose("Destroying old snapshots on source {0}:\n{1}".format(args.ssh_source, "\n".join(source_destroys)))
|
||||
zfs_destroy_snapshots(ssh_to=args.ssh_source, snapshots=source_destroys)
|
||||
try:
|
||||
zfs_destroy_snapshots(ssh_to=args.ssh_source, snapshots=source_destroys)
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
|
||||
|
||||
target_destroys=determine_destroy_list(target_obsolete_snapshots, args.keep_target)
|
||||
if target_destroys:
|
||||
verbose("Destroying old snapshots on target {0}:\n{1}".format(args.ssh_target, "\n".join(target_destroys)))
|
||||
zfs_destroy_snapshots(ssh_to=args.ssh_target, snapshots=target_destroys)
|
||||
|
||||
|
||||
verbose("All done")
|
||||
|
||||
try:
|
||||
zfs_destroy_snapshots(ssh_to=args.ssh_target, snapshots=target_destroys)
|
||||
except Exception as e:
|
||||
failed(str(e))
|
||||
|
||||
|
||||
################################################################## ENTRY POINT
|
||||
|
||||
# parse arguments
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description='ZFS autobackup v2.3')
|
||||
parser = argparse.ArgumentParser(
|
||||
description='ZFS autobackup v2.4',
|
||||
epilog='When a filesystem fails, zfs_backup will continue and report the number of failures at that end. Also the exit code will indicate the number of failures.')
|
||||
parser.add_argument('--ssh-source', default="local", help='Source host to get backup from. (user@hostname) Default %(default)s.')
|
||||
parser.add_argument('--ssh-target', default="local", help='Target host to push backup to. (user@hostname) Default %(default)s.')
|
||||
parser.add_argument('--keep-source', type=int, default=30, help='Number of days to keep old snapshots on source. Default %(default)s.')
|
||||
@ -782,17 +799,22 @@ parser.add_argument('--debug', action='store_true', help='debug output (shows co
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.ignore_replicated and args.allow_empty:
|
||||
print("Cannot use allow_empty with ignore_replicated.")
|
||||
sys.exit(1)
|
||||
abort("Cannot use allow_empty with ignore_replicated.")
|
||||
|
||||
|
||||
try:
|
||||
zfs_autobackup()
|
||||
if not failures:
|
||||
verbose("All operations completed succesfully.")
|
||||
sys.exit(0)
|
||||
else:
|
||||
verbose("{} OPERATION(S) FAILED!".format(failures))
|
||||
#exit with the number of failures.
|
||||
sys.exit(min(255,failed))
|
||||
|
||||
except Exception as e:
|
||||
if args.debug:
|
||||
raise
|
||||
else:
|
||||
print("ABORTED")
|
||||
print(str(e))
|
||||
sys.exit(1)
|
||||
|
||||
abort("FATAL ERROR")
|
||||
|
Loading…
x
Reference in New Issue
Block a user