diff --git a/README.md b/README.md index b12f0b7..d88a4cf 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ It has the following features: * Supports resuming of interrupted transfers. (via the zfs extensible_dataset feature) * Backups and snapshots can be named to prevent conflicts. (multiple backups from and to the same filesystems are no problem) * Always creates a new snapshot before starting. -* Checks everything and aborts on errors. +* Checks everything but tries continue on non-fatal errors when possible. (Reports error-count when done) * Ability to 'finish' aborted backups to see what goes wrong. * Easy to debug and has a test-mode. Actual unix commands are printed. * Keeps latest X snapshots remote and locally. (default 30, configurable) @@ -42,7 +42,7 @@ usage: zfs_autobackup [-h] [--ssh-source SSH_SOURCE] [--ssh-target SSH_TARGET] [--debug] backup_name target_path -ZFS autobackup v2.3 +ZFS autobackup v2.4 positional arguments: backup_name Name of the backup (you should set the zfs property @@ -108,6 +108,9 @@ optional arguments: (still does all read-only operations) --verbose verbose output --debug debug output (shows commands that are executed) + +When a filesystem fails, zfs_backup will continue and report the number of +failures at that end. Also the exit code will indicate the number of failures. ``` Backup example diff --git a/zfs_autobackup b/zfs_autobackup index 550e2a5..61397ed 100755 --- a/zfs_autobackup +++ b/zfs_autobackup @@ -13,18 +13,20 @@ import time def error(txt): print(txt, file=sys.stderr) - - def verbose(txt): if args.verbose: print(txt) - - def debug(txt): if args.debug: print(txt) +#fatal abort execution, exit code 255 +def abort(txt): + error(txt) + sys.exit(255) + + """run a command. specifiy ssh user@host to run remotely""" def run(cmd, input=None, ssh_to="local", tab_split=False, valid_exitcodes=[ 0 ], test=False): @@ -473,6 +475,15 @@ def zfs_get_unchanged_filesystems(ssh_to, filesystems): +#fugly.. +failures=0 +#something failed, but we try to continue with the rest +def failed(txt): + global failures + failures=failures+1 + error("FAILURE: "+txt+"\n") + + def zfs_autobackup(): ############## data gathering section @@ -490,8 +501,7 @@ def zfs_autobackup(): #nothing todo if not source_filesystems: - error("No source filesystems selected, please do a 'zfs set autobackup:{0}=true' on {1}".format(args.backup_name,args.ssh_source)) - sys.exit(1) + abort("No source filesystems selected, please do a 'zfs set autobackup:{0}=true' on {1}".format(args.backup_name,args.ssh_source)) if args.ignore_replicated: replicated_filesystems=zfs_get_unchanged_filesystems(args.ssh_source, source_filesystems) @@ -540,7 +550,6 @@ def zfs_autobackup(): ### get eixsting source snapshots - verbose("Getting source snapshot-list from {0}".format(args.ssh_source)) source_snapshots=zfs_get_snapshots(args.ssh_source, source_filesystems, args.backup_name) debug("Source snapshots:\n" + str(pprint.pformat(source_snapshots))) @@ -588,114 +597,117 @@ def zfs_autobackup(): #determine which snapshots to send for each filesystem for source_filesystem in source_filesystems: - target_filesystem=args.target_path + "/" + lstrip_path(source_filesystem, args.strip_path) + try: + target_filesystem=args.target_path + "/" + lstrip_path(source_filesystem, args.strip_path) - if source_filesystem not in source_snapshots: - #this happens if you use --no-snapshot and there are new filesystems without snapshots - verbose("* Skipping source filesystem {0}, no snapshots found".format(source_filesystem)) - else: - - #incremental or initial send? - if target_filesystem in target_snapshots and target_snapshots[target_filesystem]: - #incremental mode, determine what to send and what is obsolete - - #latest succesfully send snapshot, should be common on both source and target - latest_target_snapshot=target_snapshots[target_filesystem][-1] - - if latest_target_snapshot not in source_snapshots[source_filesystem]: - #cant find latest target anymore. find first common snapshot and inform user - error_msg="Cant find latest target snapshot on source, did you destroy/rename it?" - error_msg=error_msg+"\nLatest on target : "+target_filesystem+"@"+latest_target_snapshot - error_msg=error_msg+"\nMissing on source: "+source_filesystem+"@"+latest_target_snapshot - found=False - for latest_target_snapshot in reversed(target_snapshots[target_filesystem]): - if latest_target_snapshot in source_snapshots[source_filesystem]: - error_msg=error_msg+"\nYou could solve this by rolling back to this common snapshot on target: "+target_filesystem+"@"+latest_target_snapshot - found=True - break - if not found: - error_msg=error_msg+"\nAlso could not find an earlier common snapshot to rollback to." - else: - if args.ignore_new: - verbose("* Skipping source filesystem {0}, target already has newer snapshots.".format(source_filesystem)) - continue - - raise(Exception(error_msg)) - - #send all new source snapshots that come AFTER the last target snapshot - latest_source_index=source_snapshots[source_filesystem].index(latest_target_snapshot) - send_snapshots=source_snapshots[source_filesystem][latest_source_index+1:] - - #source snapshots that come BEFORE last target snapshot are obsolete - source_obsolete_snapshots[source_filesystem]=source_snapshots[source_filesystem][0:latest_source_index] - - #target snapshots that come BEFORE last target snapshot are obsolete - latest_target_index=target_snapshots[target_filesystem].index(latest_target_snapshot) - target_obsolete_snapshots[target_filesystem]=target_snapshots[target_filesystem][0:latest_target_index] + if source_filesystem not in source_snapshots: + #this happens if you use --no-snapshot and there are new filesystems without snapshots + verbose("* Skipping source filesystem {0}, no snapshots found".format(source_filesystem)) else: - #initial mode, send all snapshots, nothing is obsolete: - latest_target_snapshot=None - send_snapshots=source_snapshots[source_filesystem] - target_obsolete_snapshots[target_filesystem]=[] - source_obsolete_snapshots[source_filesystem]=[] - #now actually send the snapshots - if not args.no_send: + #incremental or initial send? + if target_filesystem in target_snapshots and target_snapshots[target_filesystem]: + #incremental mode, determine what to send and what is obsolete - if send_snapshots and args.rollback and latest_target_snapshot: - #roll back any changes on target - debug("Rolling back target to latest snapshot.") - run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "rollback", target_filesystem+"@"+latest_target_snapshot ]) + #latest succesfully send snapshot, should be common on both source and target + latest_target_snapshot=target_snapshots[target_filesystem][-1] + + if latest_target_snapshot not in source_snapshots[source_filesystem]: + #cant find latest target anymore. find first common snapshot and inform user + error_msg="Cant find latest target snapshot on source for '{}', did you destroy/rename it?".format(source_filesystem) + error_msg=error_msg+"\nLatest on target : "+target_filesystem+"@"+latest_target_snapshot + error_msg=error_msg+"\nMissing on source: "+source_filesystem+"@"+latest_target_snapshot + found=False + for latest_target_snapshot in reversed(target_snapshots[target_filesystem]): + if latest_target_snapshot in source_snapshots[source_filesystem]: + error_msg=error_msg+"\nYou could solve this by rolling back to this common snapshot on target: "+target_filesystem+"@"+latest_target_snapshot + found=True + break + if not found: + error_msg=error_msg+"\nAlso could not find an earlier common snapshot to rollback to." + else: + if args.ignore_new: + verbose("* Skipping source filesystem '{0}', target already has newer snapshots.".format(source_filesystem)) + continue + + raise(Exception(error_msg)) + + #send all new source snapshots that come AFTER the last target snapshot + latest_source_index=source_snapshots[source_filesystem].index(latest_target_snapshot) + send_snapshots=source_snapshots[source_filesystem][latest_source_index+1:] + + #source snapshots that come BEFORE last target snapshot are obsolete + source_obsolete_snapshots[source_filesystem]=source_snapshots[source_filesystem][0:latest_source_index] + + #target snapshots that come BEFORE last target snapshot are obsolete + latest_target_index=target_snapshots[target_filesystem].index(latest_target_snapshot) + target_obsolete_snapshots[target_filesystem]=target_snapshots[target_filesystem][0:latest_target_index] + else: + #initial mode, send all snapshots, nothing is obsolete: + latest_target_snapshot=None + send_snapshots=source_snapshots[source_filesystem] + target_obsolete_snapshots[target_filesystem]=[] + source_obsolete_snapshots[source_filesystem]=[] + + #now actually send the snapshots + if not args.no_send: + + if send_snapshots and args.rollback and latest_target_snapshot: + #roll back any changes on target + debug("Rolling back target to latest snapshot.") + run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "rollback", target_filesystem+"@"+latest_target_snapshot ]) - for send_snapshot in send_snapshots: + for send_snapshot in send_snapshots: - #resumable? - if target_filesystem in resumable_target_filesystems: - resume_token=resumable_target_filesystems.pop(target_filesystem) - else: - resume_token=None - - #hold the snapshot we're sending on the source - if not args.no_holds: - zfs_hold_snapshot(ssh_to=args.ssh_source, snapshot=source_filesystem+"@"+send_snapshot) - - zfs_transfer( - ssh_source=args.ssh_source, source_filesystem=source_filesystem, - first_snapshot=latest_target_snapshot, second_snapshot=send_snapshot, - ssh_target=args.ssh_target, target_filesystem=target_filesystem, - resume_token=resume_token - ) - - #hold the snapshot we just send to the target - zfs_hold_snapshot(ssh_to=args.ssh_target, snapshot=target_filesystem+"@"+send_snapshot) - - - - #now that we succesfully transferred this snapshot, the previous snapshot is obsolete: - if latest_target_snapshot: - zfs_release_snapshot(ssh_to=args.ssh_target, snapshot=target_filesystem+"@"+latest_target_snapshot) - target_obsolete_snapshots[target_filesystem].append(latest_target_snapshot) + #resumable? + if target_filesystem in resumable_target_filesystems: + resume_token=resumable_target_filesystems.pop(target_filesystem) + else: + resume_token=None + #hold the snapshot we're sending on the source if not args.no_holds: - zfs_release_snapshot(ssh_to=args.ssh_source, snapshot=source_filesystem+"@"+latest_target_snapshot) - source_obsolete_snapshots[source_filesystem].append(latest_target_snapshot) - #we just received a new filesytem? - else: - if args.clear_refreservation: - debug("Clearing refreservation to save space.") + zfs_hold_snapshot(ssh_to=args.ssh_source, snapshot=source_filesystem+"@"+send_snapshot) - run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "set", "refreservation=none", target_filesystem ]) + zfs_transfer( + ssh_source=args.ssh_source, source_filesystem=source_filesystem, + first_snapshot=latest_target_snapshot, second_snapshot=send_snapshot, + ssh_target=args.ssh_target, target_filesystem=target_filesystem, + resume_token=resume_token + ) + + #hold the snapshot we just send to the target + zfs_hold_snapshot(ssh_to=args.ssh_target, snapshot=target_filesystem+"@"+send_snapshot) - if args.clear_mountpoint: - debug("Setting canmount=noauto to prevent auto-mounting in the wrong place. (ignoring errors)") - run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "set", "canmount=noauto", target_filesystem ], valid_exitcodes= [0, 1] ) + #now that we succesfully transferred this snapshot, the previous snapshot is obsolete: + if latest_target_snapshot: + zfs_release_snapshot(ssh_to=args.ssh_target, snapshot=target_filesystem+"@"+latest_target_snapshot) + target_obsolete_snapshots[target_filesystem].append(latest_target_snapshot) + + if not args.no_holds: + zfs_release_snapshot(ssh_to=args.ssh_source, snapshot=source_filesystem+"@"+latest_target_snapshot) + source_obsolete_snapshots[source_filesystem].append(latest_target_snapshot) + #we just received a new filesytem? + else: + if args.clear_refreservation: + debug("Clearing refreservation to save space.") + + run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "set", "refreservation=none", target_filesystem ]) - latest_target_snapshot=send_snapshot + if args.clear_mountpoint: + debug("Setting canmount=noauto to prevent auto-mounting in the wrong place. (ignoring errors)") + run(ssh_to=args.ssh_target, test=args.test, cmd=["zfs", "set", "canmount=noauto", target_filesystem ], valid_exitcodes= [0, 1] ) + + + latest_target_snapshot=send_snapshot + # failed, skip this source_filesystem + except Exception as e: + failed(str(e)) ############## cleanup section @@ -730,23 +742,28 @@ def zfs_autobackup(): source_destroys=determine_destroy_list(source_obsolete_snapshots, args.keep_source) if source_destroys: verbose("Destroying old snapshots on source {0}:\n{1}".format(args.ssh_source, "\n".join(source_destroys))) - zfs_destroy_snapshots(ssh_to=args.ssh_source, snapshots=source_destroys) + try: + zfs_destroy_snapshots(ssh_to=args.ssh_source, snapshots=source_destroys) + except Exception as e: + failed(str(e)) + target_destroys=determine_destroy_list(target_obsolete_snapshots, args.keep_target) if target_destroys: verbose("Destroying old snapshots on target {0}:\n{1}".format(args.ssh_target, "\n".join(target_destroys))) - zfs_destroy_snapshots(ssh_to=args.ssh_target, snapshots=target_destroys) - - - verbose("All done") - + try: + zfs_destroy_snapshots(ssh_to=args.ssh_target, snapshots=target_destroys) + except Exception as e: + failed(str(e)) ################################################################## ENTRY POINT # parse arguments import argparse -parser = argparse.ArgumentParser(description='ZFS autobackup v2.3') +parser = argparse.ArgumentParser( + description='ZFS autobackup v2.4', + epilog='When a filesystem fails, zfs_backup will continue and report the number of failures at that end. Also the exit code will indicate the number of failures.') parser.add_argument('--ssh-source', default="local", help='Source host to get backup from. (user@hostname) Default %(default)s.') parser.add_argument('--ssh-target', default="local", help='Target host to push backup to. (user@hostname) Default %(default)s.') parser.add_argument('--keep-source', type=int, default=30, help='Number of days to keep old snapshots on source. Default %(default)s.') @@ -782,17 +799,22 @@ parser.add_argument('--debug', action='store_true', help='debug output (shows co args = parser.parse_args() if args.ignore_replicated and args.allow_empty: - print("Cannot use allow_empty with ignore_replicated.") - sys.exit(1) + abort("Cannot use allow_empty with ignore_replicated.") try: zfs_autobackup() + if not failures: + verbose("All operations completed succesfully.") + sys.exit(0) + else: + verbose("{} OPERATION(S) FAILED!".format(failures)) + #exit with the number of failures. + sys.exit(min(255,failed)) + except Exception as e: if args.debug: raise else: - print("ABORTED") print(str(e)) - sys.exit(1) - + abort("FATAL ERROR")