Merge branch 'master' of github.com:psy0rz/zfs_autobackup

2025-07-09 16:21:01 +03:00 · 2020-05-14 17:43:04 +02:00 · 2020-05-14 17:43:04 +02:00 · 4e1bfd8cba
commit 4e1bfd8cba
parent 9be1f334cb 0388026f94
7 changed files with 395 additions and 63 deletions
--- a/.gitignore
+++ b/.gitignore
@ -6,3 +6,4 @@ build/
 zfs_autobackup.egg-info
 .eggs/
 __pycache__
+.coverage
--- a/README.md
+++ b/README.md
@ -286,7 +286,7 @@ You can specify as many rules as you need. The order of the rules doesn't matter

 Keep in mind its up to you to actually run zfs-autobackup often enough: If you want to keep hourly snapshots, you have to make sure you at least run it every hour.

-However, its no problem if you run it more or less often than that: The thinner will still do its best to choose an optimal set of snapshots to choose.
+However, its no problem if you run it more or less often than that: The thinner will still keep an optimal set of snapshots to match your schedule as good as possible.

 If you want to keep as few snapshots as possible, just specify 0. (`--keep-source=0` for example)

--- a/bin/zfs-autobackup
+++ b/bin/zfs-autobackup
@ -26,7 +26,7 @@ if sys.stdout.isatty():
    except ImportError:
        pass

-VERSION="3.0-rc10"
+VERSION="3.0-rc11"
 HEADER="zfs-autobackup v{} - Copyright 2020 E.H.Eefting (edwin@datux.nl)\n".format(VERSION)

 class Log:
@ -226,53 +226,6 @@ class Thinner:



-# ######### Thinner testing code
-# now=int(time.time())
-#
-# t=Thinner("1d1w,1w1m,1m6m,1y2y", always_keep=1)
-#
-# import random
-#
-# class Thing:
-#     def __init__(self, timestamp):
-#         self.timestamp=timestamp
-#
-#     def __str__(self):
-#         age=now-self.timestamp
-#         struct=time.localtime(self.timestamp)
-#         return("{} ({} days old)".format(time.strftime("%Y-%m-%d %H:%M:%S",struct),int(age/(3600*24))))
-#
-# def test():
-#     global now
-#     things=[]
-#
-#     while True:
-#         print("#################### {}".format(time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(now))))
-#
-#         (keeps, removes)=t.run(things, now)
-#
-#         print ("### KEEP ")
-#         for thing in keeps:
-#             print(thing)
-#
-#         print ("### REMOVE ")
-#         for thing in removes:
-#             print(thing)
-#
-#         things=keeps
-#
-#         #increase random amount of time and maybe add a thing
-#         now=now+random.randint(0,160000)
-#         if random.random()>=0:
-#             things.append(Thing(now))
-#
-#         sys.stdin.readline()
-#
-# test()
-
-
-
-
 class cached_property(object):
    """ A property that is only computed once per instance and then replaces
        itself with an ordinary attribute. Deleting the attribute resets the
@ -301,10 +254,21 @@ class cached_property(object):

        return obj._cached_properties[propname]

+class Logger():
+
+    #simple logging stubs
+    def debug(self, txt):
+        print("DEBUG  : "+txt)
+
+    def verbose(self, txt):
+        print("VERBOSE: "+txt)
+
+    def error(self, txt):
+        print("ERROR  : "+txt)



-class ExecuteNode:
+class ExecuteNode(Logger):
    """an endpoint to execute local or remote commands via ssh"""


@ -349,11 +313,14 @@ class ExecuteNode:

    def run(self, cmd, input=None, tab_split=False, valid_exitcodes=[ 0 ], readonly=False, hide_errors=False, pipe=False, return_stderr=False):
        """run a command on the node
-
-        readonly: make this True if the command doesn't make any changes and is safe to execute in testmode
-        pipe: Instead of executing, return a pipe-handle to be used to input to another run() command. (just like a | in linux)
+        cmd: the actual command, should be a list, where the first item is the command and the rest are parameters.
        input: Can be None, a string or a pipe-handle you got from another run()
-        return_stderr: return both stdout and stderr as a tuple
+        tab_split: split tabbed files in output into a list
+        valid_exitcodes: list of valid exit codes for this command (checks exit code of both sides of a pipe)
+        readonly: make this True if the command doesn't make any changes and is safe to execute in testmode
+        hide_errors: don't show stderr output as error, instead show it as debugging output (use to hide expected errors)
+        pipe: Instead of executing, return a pipe-handle to be used to input to another run() command. (just like a | in linux)
+        return_stderr: return both stdout and stderr as a tuple. (only returns stderr from this side of the pipe)
        """

        encoded_cmd=[]
@ -371,7 +338,9 @@ class ExecuteNode:
            #(this is necessary if LC_ALL=en_US.utf8 is not set in the environment)
            for arg in cmd:
                #add single quotes for remote commands to support spaces and other weird stuff (remote commands are executed in a shell)
-                encoded_cmd.append( ("'"+arg+"'").encode('utf-8'))
+                #and escape existing single quotes (bash needs ' to end the quoted string, then a \' for the actual quote and then another ' to start a new quoted string)
+                #(and then python needs the double \ to get a single \)
+                encoded_cmd.append( ("'" + arg.replace("'","'\\''") + "'").encode('utf-8'))

        else:
            for arg in cmd:
@ -414,8 +383,12 @@ class ExecuteNode:

        #Note: make streaming?
        if isinstance(input,str) or type(input)=='unicode':
-            p.stdin.write(input)
+            p.stdin.write(input.encode('utf-8'))

+        if p.stdin:
+            p.stdin.close()
+
+        #return pipe
        if pipe:
            return(p)

@ -474,10 +447,11 @@ class ExecuteNode:
            if valid_exitcodes and input.returncode not in valid_exitcodes:
                raise(subprocess.CalledProcessError(input.returncode, "(pipe)"))

-
        if valid_exitcodes and p.returncode not in valid_exitcodes:
            raise(subprocess.CalledProcessError(p.returncode, encoded_cmd))

+    
+
        if return_stderr:
            return ( output_lines, error_lines )
        else:
@ -1261,14 +1235,14 @@ class ZfsDataset():
 class ZfsNode(ExecuteNode):
    """a node that contains zfs datasets. implements global (systemwide/pool wide) zfs commands"""

-    def __init__(self, backup_name, zfs_autobackup, ssh_config=None, ssh_to=None, readonly=False, description="", debug_output=False, thinner=Thinner()):
+    def __init__(self, backup_name, logger, ssh_config=None, ssh_to=None, readonly=False, description="", debug_output=False, thinner=Thinner()):
        self.backup_name=backup_name
-        if not description:
+        if not description and ssh_to:
            self.description=ssh_to
        else:
            self.description=description

-        self.zfs_autobackup=zfs_autobackup #for logging
+        self.logger=logger

        if ssh_config:
            self.verbose("Using custom SSH config: {}".format(ssh_config))
@ -1346,13 +1320,13 @@ class ZfsNode(ExecuteNode):
        self.parse_zfs_progress(line, hide_errors, "STDERR > ")

    def verbose(self,txt):
-        self.zfs_autobackup.verbose("{} {}".format(self.description, txt))
+        self.logger.verbose("{} {}".format(self.description, txt))

    def error(self,txt,titles=[]):
-        self.zfs_autobackup.error("{} {}".format(self.description, txt))
+        self.logger.error("{} {}".format(self.description, txt))

    def debug(self,txt, titles=[]):
-        self.zfs_autobackup.debug("{} {}".format(self.description, txt))
+        self.logger.debug("{} {}".format(self.description, txt))

    def new_snapshotname(self):
        """determine uniq new snapshotname"""
--- a/3
+++ b/3
@ -0,0 +1,3 @@
+#!/bin/bash
+coverage run --source bin.zfs_autobackup -m unittest ; coverage report
+
--- a/test_executenode.py
+++ b/test_executenode.py
@ -0,0 +1,137 @@
+
+#default test stuff
+import unittest
+from bin.zfs_autobackup import *
+
+import subprocess
+import time
+
+print("THIS TEST REQUIRES SSH TO LOCALHOST")
+
+class TestExecuteNode(unittest.TestCase):
+
+    # def setUp(self):
+
+    #     return super().setUp()
+
+    def basics(self, node ):
+
+        with self.subTest("simple echo"):
+            self.assertEqual(node.run(["echo","test"]), ["test"])
+
+        with self.subTest("error exit code"):
+            with self.assertRaises(subprocess.CalledProcessError):
+                node.run(["false"])
+
+        #
+        with self.subTest("multiline without tabsplit"):
+            self.assertEqual(node.run(["echo","l1c1\tl1c2\nl2c1\tl2c2"], tab_split=False), ["l1c1\tl1c2", "l2c1\tl2c2"])
+
+        #multiline tabsplit
+        with self.subTest("multiline tabsplit"):
+            self.assertEqual(node.run(["echo","l1c1\tl1c2\nl2c1\tl2c2"], tab_split=True), [['l1c1', 'l1c2'], ['l2c1', 'l2c2']])
+
+        #escaping test (shouldnt be a problem locally, single quotes can be a problem remote via ssh)
+        with self.subTest("escape test"):
+            s="><`'\"@&$()$bla\\/.*!#test _+-={}[]|"
+            self.assertEqual(node.run(["echo",s]), [s])
+
+        #return std err as well, trigger stderr by listing something non existing
+        with self.subTest("stderr return"):
+            (stdout, stderr)=node.run(["ls", "nonexistingfile"], return_stderr=True, valid_exitcodes=[2])
+            self.assertEqual(stdout,[])
+            self.assertRegex(stderr[0],"nonexistingfile")
+
+        #slow command, make sure things dont exit too early
+        with self.subTest("early exit test"):
+            start_time=time.time()
+            self.assertEqual(node.run(["sleep","1"]), [])
+            self.assertGreaterEqual(time.time()-start_time,1)
+
+        #input a string and check it via cat
+        with self.subTest("stdin input string"):
+            self.assertEqual(node.run(["cat"], input="test"), ["test"])
+
+
+    def test_basics_local(self):
+        node=ExecuteNode(debug_output=True)
+        self.basics(node)
+
+    def test_basics_remote(self):
+        node=ExecuteNode(ssh_to="localhost", debug_output=True)
+        self.basics(node)
+
+    ################
+
+    def test_readonly(self):
+        node=ExecuteNode(debug_output=True, readonly=True)
+
+        self.assertEqual(node.run(["echo","test"], readonly=False), None)
+        self.assertEqual(node.run(["echo","test"], readonly=True), ["test"])
+
+
+    ################
+
+    def pipe(self, nodea, nodeb):
+
+        with self.subTest("pipe data"):
+            output=nodea.run(["dd", "if=/dev/zero", "count=1000"], pipe=True)
+            self.assertEqual(nodeb.run(["md5sum"], input=output), ["816df6f64deba63b029ca19d880ee10a  -"])
+
+        with self.subTest("exit code both ends of pipe ok"):
+            output=nodea.run(["true"], pipe=True)
+            nodeb.run(["true"], input=output)
+    
+        with self.subTest("error on pipe input side"):
+            with self.assertRaises(subprocess.CalledProcessError):
+                output=nodea.run(["false"], pipe=True)
+                nodeb.run(["true"], input=output)
+
+        with self.subTest("error on pipe output side "):
+            with self.assertRaises(subprocess.CalledProcessError):
+                output=nodea.run(["true"], pipe=True)
+                nodeb.run(["false"], input=output)
+
+        with self.subTest("error on both sides of pipe"):
+            with self.assertRaises(subprocess.CalledProcessError):
+                output=nodea.run(["false"], pipe=True)
+                nodeb.run(["false"], input=output)
+
+        with self.subTest("check stderr on pipe output side"):
+            output=nodea.run(["true"], pipe=True)
+            (stdout, stderr)=nodeb.run(["ls", "nonexistingfile"], input=output, return_stderr=True, valid_exitcodes=[0,2])
+            self.assertEqual(stdout,[])
+            self.assertRegex(stderr[0], "nonexistingfile" )
+
+        with self.subTest("check stderr on pipe input side (should be only printed)"):
+            output=nodea.run(["ls", "nonexistingfile"], pipe=True)
+            (stdout, stderr)=nodeb.run(["true"], input=output, return_stderr=True, valid_exitcodes=[0,2])
+            self.assertEqual(stdout,[])
+            self.assertEqual(stderr,[] )
+            
+
+
+
+    def test_pipe_local_local(self):
+        nodea=ExecuteNode(debug_output=True)
+        nodeb=ExecuteNode(debug_output=True)
+        self.pipe(nodea, nodeb)
+
+    def test_pipe_remote_remote(self):
+        nodea=ExecuteNode(ssh_to="localhost", debug_output=True)
+        nodeb=ExecuteNode(ssh_to="localhost", debug_output=True)
+        self.pipe(nodea, nodeb)
+
+    def test_pipe_local_remote(self):
+        nodea=ExecuteNode(debug_output=True)
+        nodeb=ExecuteNode(ssh_to="localhost", debug_output=True)
+        self.pipe(nodea, nodeb)
+
+    def test_pipe_remote_local(self):
+        nodea=ExecuteNode(ssh_to="localhost", debug_output=True)
+        nodeb=ExecuteNode(debug_output=True)
+        self.pipe(nodea, nodeb)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/test_thinner.py
+++ b/test_thinner.py
@ -0,0 +1,154 @@
+
+#default test stuff
+import unittest
+from bin.zfs_autobackup import *
+
+#test specific
+import random
+import sys
+import time
+import pprint
+
+class Thing:
+    def __init__(self, timestamp):
+        self.timestamp=timestamp
+
+    def __str__(self):
+        # age=now-self.timestamp
+        struct=time.localtime(self.timestamp)
+        return("{}".format(time.strftime("%Y-%m-%d %H:%M:%S",struct)))
+
+
+class TestThinner(unittest.TestCase):
+
+    def setUp(self):
+
+        return super().setUp()
+
+    def test_incremental(self):
+
+        ok=['2023-01-01 11:09:50',
+        '2024-01-01 21:06:35',
+        '2025-01-01 10:59:44',
+        '2026-01-01 19:06:41',
+        '2026-03-08 03:27:07',
+        '2026-04-07 04:29:04',
+        '2026-05-07 20:39:31',
+        '2026-06-06 08:06:14',
+        '2026-07-06 05:53:12',
+        '2026-08-05 08:23:43',
+        '2026-09-04 23:13:46',
+        '2026-10-04 02:50:48',
+        '2026-11-03 02:52:55',
+        '2026-12-03 16:04:25',
+        '2027-01-01 10:02:16',
+        '2027-01-02 10:59:16',
+        '2027-01-28 10:54:49',
+        '2027-02-01 09:59:47',
+        '2027-02-04 04:24:33',
+        '2027-02-11 02:51:49',
+        '2027-02-18 05:09:25',
+        '2027-02-19 15:21:39',
+        '2027-02-20 14:41:38',
+        '2027-02-21 08:33:50',
+        '2027-02-22 08:39:18',
+        '2027-02-23 08:52:18',
+        '2027-02-24 03:16:31',
+        '2027-02-24 03:17:08',
+        '2027-02-24 06:26:13',
+        '2027-02-24 13:56:41']
+
+        #some arbitrary date
+        now=1589229252
+        #we want deterministic results
+        random.seed(1337)
+        thinner=Thinner("5,10s1min,1d1w,1w1m,1m12m,1y5y")
+        things=[]
+
+        #thin incrementally while adding
+        for i in range(0,5000):
+
+            #increase random amount of time and maybe add a thing
+            now=now+random.randint(0,3600*24)
+            if random.random()>=0:
+                things.append(Thing(now))
+
+            (keeps, removes)=thinner.thin(things, now=now)
+            things=keeps
+
+
+        result=[]
+        for thing in things:
+            result.append(str(thing))
+        
+        print("Thinner result:")
+        pprint.pprint(result)
+
+        self.assertEqual(result, ok)
+
+
+    def test_full(self):
+
+        ok=['2022-02-24 16:54:37',
+            '2023-01-01 11:09:50',
+            '2024-01-01 21:06:35',
+            '2025-01-01 10:59:44',
+            '2026-01-01 19:06:41',
+            '2026-03-02 00:23:58',
+            '2026-03-08 03:27:07',
+            '2026-04-07 04:29:04',
+            '2026-05-07 20:39:31',
+            '2026-06-06 08:06:14',
+            '2026-07-06 05:53:12',
+            '2026-08-05 08:23:43',
+            '2026-09-04 23:13:46',
+            '2026-10-04 02:50:48',
+            '2026-11-03 02:52:55',
+            '2026-12-03 16:04:25',
+            '2027-01-01 10:02:16',
+            '2027-01-02 10:59:16',
+            '2027-01-25 21:00:35',
+            '2027-01-28 10:54:49',
+            '2027-02-01 09:59:47',
+            '2027-02-04 04:24:33',
+            '2027-02-11 02:51:49',
+            '2027-02-18 05:09:25',
+            '2027-02-19 15:21:39',
+            '2027-02-20 14:41:38',
+            '2027-02-21 08:33:50',
+            '2027-02-22 08:39:18',
+            '2027-02-23 08:52:18',
+            '2027-02-24 03:16:31',
+            '2027-02-24 03:17:08',
+            '2027-02-24 06:26:13',
+            '2027-02-24 13:56:41']
+
+
+        #some arbitrary date
+        now=1589229252
+        #we want deterministic results
+        random.seed(1337)
+        thinner=Thinner("5,10s1min,1d1w,1w1m,1m12m,1y5y")
+        things=[]
+
+        for i in range(0,5000):
+
+            #increase random amount of time and maybe add a thing
+            now=now+random.randint(0,3600*24)
+            if random.random()>=0:
+                things.append(Thing(now))
+
+        (things, removes)=thinner.thin(things, now=now)
+
+        result=[]
+        for thing in things:
+            result.append(str(thing))
+        
+        print("Thinner result:")
+        pprint.pprint(result)
+
+        self.assertEqual(result, ok)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/test_zfsnode.py
+++ b/test_zfsnode.py
@ -0,0 +1,63 @@
+
+#default test stuff
+import unittest
+from bin.zfs_autobackup import *
+
+import subprocess
+import time
+from pprint import pformat
+
+class TestZfsNode(unittest.TestCase):
+
+    def setUp(self):
+        print("Preparing zfs filesystems...")
+
+        #need ram blockdevice
+        # subprocess.call("rmmod brd", shell=True)
+        subprocess.check_call("modprobe brd rd_size=512000", shell=True)
+
+        #remove old stuff
+        subprocess.call("zpool destroy test_source1", shell=True)
+        subprocess.call("zpool destroy test_source2", shell=True)
+        subprocess.call("zpool destroy test_target1", shell=True)
+
+        #create pools
+        subprocess.check_call("zpool create test_source1 /dev/ram0", shell=True)
+        subprocess.check_call("zpool create test_source2 /dev/ram1", shell=True)
+        subprocess.check_call("zpool create test_target1 /dev/ram2", shell=True)
+
+        #create test structure
+        subprocess.check_call("zfs create -p test_source1/fs1/sub", shell=True)
+        subprocess.check_call("zfs create -p test_source2/fs2/sub", shell=True)
+        subprocess.check_call("zfs create -p test_source2/fs3/sub", shell=True)
+        subprocess.check_call("zfs set autobackup:test=true test_source1/fs1", shell=True)
+        subprocess.check_call("zfs set autobackup:test=child test_source2/fs2", shell=True)
+
+        print("Prepare done")
+
+        return super().setUp()
+
+
+
+    def test_getselected(self):
+        logger=Logger()
+        description="[Source]"
+        node=ZfsNode("test", logger, description=description)
+        s=pformat(node.selected_datasets)
+        print(s)
+
+        #basics
+        self.assertEqual (s, """[(local): test_source1/fs1,
+ (local): test_source1/fs1/sub,
+ (local): test_source2/fs2/sub]""")
+
+        #caching, so expect same result
+        subprocess.check_call("zfs set autobackup:test=true test_source2/fs3", shell=True)
+        self.assertEqual (s, """[(local): test_source1/fs1,
+ (local): test_source1/fs1/sub,
+ (local): test_source2/fs2/sub]""")
+
+
+
+if __name__ == '__main__':
+    unittest.main()