From 973479f853c8177c4aa3620005cadec1ab18d62c Mon Sep 17 00:00:00 2001 From: Edwin Eefting Date: Wed, 25 Nov 2015 11:34:05 +0100 Subject: [PATCH] smarter more flexible cleanup of old snapshots --- zfs_autobackup | 109 +++++++++++++++++++++++++++++-------------------- 1 file changed, 65 insertions(+), 44 deletions(-) diff --git a/zfs_autobackup b/zfs_autobackup index 4755a8f..7727f40 100755 --- a/zfs_autobackup +++ b/zfs_autobackup @@ -17,12 +17,14 @@ parser = argparse.ArgumentParser(description='ZFS autobackup v2.0') parser.add_argument('--ssh-source', default="local", help='Source host to get backup from. (user@hostname) Default %(default)s.') parser.add_argument('--ssh-target', default="local", help='Target host to push backup to. (user@hostname) Default %(default)s.') parser.add_argument('--ssh-cipher', default="arcfour128", help='SSH cipher to use (default %(default)s)') -parser.add_argument('--keep-source', type=int, default=30, help='Number of old snapshots to keep on source. Default %(default)s.') -parser.add_argument('--keep-target', type=int, default=30, help='Number of old snapshots to keep on target. Default %(default)s.') +parser.add_argument('--keep-source', type=int, default=30, help='Number of days to keep old snapshots on source. Default %(default)s.') +parser.add_argument('--keep-target', type=int, default=30, help='Number of days to keep old snapshots on target. Default %(default)s.') parser.add_argument('backup_name', help='Name of the backup (you should set the zfs property "autobackup:backup-name" to true on filesystems you want to backup') parser.add_argument('target_fs', help='Target filesystem') -parser.add_argument('--finish', action='store_true', help='dont create new snapshot, just finish sending current snapshots') +parser.add_argument('--no-snapshot', action='store_true', help='dont create new snapshot (usefull for finishing uncompleted backups, or cleanups)') +parser.add_argument('--no-send', action='store_true', help='dont send snapshots (usefull to only do a cleanup)') + parser.add_argument('--compress', action='store_true', help='use compression during zfs send/recv') parser.add_argument('--test', action='store_true', help='dont change anything, just show what would be done (still does all read-only operations)') parser.add_argument('--verbose', action='store_true', help='verbose output') @@ -140,10 +142,10 @@ def zfs_get_backupped_filesystems(ssh_to, backup_name, target_fs): -"""destroy list of filesystems or snapshots""" +"""destroy list of filesystems or snapshots (in @format) """ def zfs_destroy(ssh_to, filesystems): - debug("Destroying on {0}:\n{1}".format(ssh_to, "\n".join(filesystems))) + # debug("Destroying on {0}:\n{1}".format(ssh_to, "\n".join(filesystems))) #zfs can only destroy one filesystem at once so we use xargs and stdin run(ssh_to=ssh_to, test=args.test, input="\0".join(filesystems), cmd= [ "xargs", "-0", "-n", "1", "zfs", "destroy", "-d" ] @@ -300,9 +302,6 @@ if args.test: args.verbose=True verbose("RUNNING IN TEST-MODE, NOT MAKING ACTUAL BACKUP!") -if args.keep_source<1 or args.keep_target<1: - raise(Exception("Minimum number of snapshots to keep is 1")) - #get selected filesystem on backup source verbose("Getting selected source filesystems for backup {0} on {1}".format(args.backup_name,args.ssh_source)) source_filesystems=zfs_get_selected_filesystems(args.ssh_source, args.backup_name) @@ -319,7 +318,7 @@ for source_filesystem in source_filesystems: target_filesystems.append(args.target_fs+"/"+source_filesystem) #create new snapshot? -if not args.finish: +if not args.no_snapshot: new_snapshot_name=args.backup_name+"-"+time.strftime("%Y%m%d%H%M%S") verbose("Creating source snapshot {0} on {1} ".format(new_snapshot_name, args.ssh_source)) zfs_create_snapshot(args.ssh_source, source_filesystems, new_snapshot_name) @@ -339,66 +338,88 @@ except subprocess.CalledProcessError: debug("Target snapshots: " + str(pprint.pformat(target_snapshots))) +#obsolete snapshots that may be removed +source_obsolete_snapshots={} +target_obsolete_snapshots={} + #determine which snapshots to send for each filesystem for source_filesystem in source_filesystems: target_filesystem=args.target_fs+"/"+source_filesystem if source_filesystem not in source_snapshots: - #this happens if you use --finish and there are new filesystems without snapshots + #this happens if you use --no-snapshot and there are new filesystems without snapshots verbose("Skipping source filesystem {0}, no snapshots found".format(source_filesystem)) else: - send_snapshots=source_snapshots[source_filesystem][:] - - #remove snapshots that are already on target from send-list + #incremental or initial send? if target_filesystem in target_snapshots and target_snapshots[target_filesystem]: + #incremental mode, determine what to send and what is obsolete + + #latest succesfully send snapshot, should be common on both source and target latest_target_snapshot=target_snapshots[target_filesystem][-1] - if latest_target_snapshot not in send_snapshots: + if latest_target_snapshot not in source_snapshots[source_filesystem]: raise(Exception("Cant find latest target snapshot on source, did you destroy it accidently? "+source_filesystem+"@"+latest_target_snapshot)) - while latest_target_snapshot in send_snapshots: - send_snapshots.pop(0) + #send all new source snapshots that come AFTER the last target snapshot + latest_source_index=source_snapshots[source_filesystem].index(latest_target_snapshot) + send_snapshots=source_snapshots[source_filesystem][latest_source_index+1:] + + #source snapshots that come BEFORE last target snapshot are obsolete + source_obsolete_snapshots[source_filesystem]=source_snapshots[source_filesystem][0:latest_source_index] + + #target snapshots that come BEFORE last target snapshot are obsolete + latest_target_index=target_snapshots[target_filesystem].index(latest_target_snapshot) + target_obsolete_snapshots[target_filesystem]=target_snapshots[target_filesystem][0:latest_source_index] else: + #initial mode, send all snapshots, nothing is obsolete: latest_target_snapshot=None + send_snapshots=source_snapshots[source_filesystem] + target_obsolete_snapshots[target_filesystem]=[] + source_obsolete_snapshots[source_filesystem]=[] #now actually send the snapshots - for send_snapshot in send_snapshots: - zfs_transfer( - ssh_source=args.ssh_source, source_filesystem=source_filesystem, first_snapshot=latest_target_snapshot, second_snapshot=send_snapshot, - ssh_target=args.ssh_target, target_filesystem=target_filesystem) + if not args.no_send: + for send_snapshot in send_snapshots: + zfs_transfer( + ssh_source=args.ssh_source, source_filesystem=source_filesystem, first_snapshot=latest_target_snapshot, second_snapshot=send_snapshot, + ssh_target=args.ssh_target, target_filesystem=target_filesystem) - #update target_snapshot list for later cleanup - if not target_filesystem in target_snapshots: - target_snapshots[target_filesystem]=[] - target_snapshots[target_filesystem].append(send_snapshot) + #now that we succesfully transferred this snapshot, the previous snapshot is obsolete: + target_obsolete_snapshots[target_filesystem].append(latest_target_snapshot) + source_obsolete_snapshots[source_filesystem].append(latest_target_snapshot) - latest_target_snapshot=send_snapshot + latest_target_snapshot=send_snapshot + +#we only do cleanups after everything is complete, to keep everything consistent (same snapshots everywhere) + +#get list of snapshot (in @format) to destroy +now=time.time() +def get_destroy_list(snapshots, days): + ret=[] + for filesystem in snapshots: + for snapshot in snapshots[filesystem]: + time_str=re.findall("^.*-([0-9]*)$", snapshot)[0] + if len(time_str)==14: + #new format: + time_secs=time.mktime(time.strptime(time_str,"%Y%m%d%H%M%S")) + else: + time_secs=int(time_str) + # verbose("time_secs"+time_str) + if (now-time_secs) > (24 * 3600 * days): + ret.append(filesystem+"@"+snapshot) + + return(ret) -#cleanup old source snapshots -source_destroys=[] -for source_filesystem in source_snapshots: - destroy_count=len(source_snapshots[source_filesystem])-args.keep_source - if destroy_count>0: - for snapshot in source_snapshots[source_filesystem][0:destroy_count-1]: - source_destroys.append(source_filesystem+"@"+snapshot) - +source_destroys=get_destroy_list(source_obsolete_snapshots, args.keep_source) if source_destroys: - verbose("Destroying old snapshots on source") + verbose("Destroying old snapshots on source {0}:\n{1}".format(args.ssh_source, "\n".join(source_destroys))) zfs_destroy(ssh_to=args.ssh_source, filesystems=source_destroys) - -#cleanup old target snapshots -target_destroys=[] -for target_filesystem in target_snapshots: - destroy_count=len(target_snapshots[target_filesystem])-args.keep_target - if destroy_count>0: - for snapshot in target_snapshots[target_filesystem][0:destroy_count-1]: - target_destroys.append(target_filesystem+"@"+snapshot) - +target_destroys=get_destroy_list(target_obsolete_snapshots, args.keep_target) if target_destroys: - verbose("Destroying old snapshots on target") + verbose("Destroying old snapshots on target {0}:\n{1}".format(args.ssh_target, "\n".join(target_destroys))) zfs_destroy(ssh_to=args.ssh_target, filesystems=target_destroys)