From de3dff77b85524ac5280d0a9292461d16a30afea Mon Sep 17 00:00:00 2001
From: Edwin Eefting <edwin@datux.nl>
Date: Sun, 15 Oct 2023 16:16:05 +0200
Subject: [PATCH] wip

---
 tests/test_zfsautobackup32.py   |  2 +-
 zfs_autobackup/ZfsAutobackup.py |  4 +-
 zfs_autobackup/ZfsDataset.py    | 78 +++++++++++++++++++--------------
 zfs_autobackup/ZfsNode.py       | 26 ++++++++++-
 4 files changed, 73 insertions(+), 37 deletions(-)

diff --git a/tests/test_zfsautobackup32.py b/tests/test_zfsautobackup32.py
index 75771ce..068d322 100644
--- a/tests/test_zfsautobackup32.py
+++ b/tests/test_zfsautobackup32.py
@@ -163,9 +163,9 @@ test_target1/test_source2/fs2/sub@test-20101111000001
         with mocktime("20010202000000"):
             self.assertFalse(ZfsAutobackup("test2 --allow-empty".split(" ")).run())
 
-        #will become common snapshot
         with OutputIO() as buf:
             with redirect_stdout(buf):
+                # now do thinning and transfer all at once
                 with mocktime("20010203000000"):
                     self.assertFalse(ZfsAutobackup("--keep-source=1d10d --keep-target=1m10m --allow-empty --verbose --clear-mountpoint --other-snapshots test2 test_target1".split(" ")).run())
 
diff --git a/zfs_autobackup/ZfsAutobackup.py b/zfs_autobackup/ZfsAutobackup.py
index 6b0b2a3..11764db 100644
--- a/zfs_autobackup/ZfsAutobackup.py
+++ b/zfs_autobackup/ZfsAutobackup.py
@@ -161,7 +161,7 @@ class ZfsAutobackup(ZfsAuto):
 
             try:
                 dataset.debug("Missing on source, thinning")
-                dataset.thin()
+                dataset.thin_list()
 
             except Exception as e:
                 dataset.error("Error during thinning of missing datasets ({})".format(str(e)))
@@ -393,7 +393,7 @@ class ZfsAutobackup(ZfsAuto):
         self.set_title("Thinning source")
 
         for source_dataset in source_datasets:
-            source_dataset.thin(skip_holds=True)
+            source_dataset.thin_list(skip_holds=True)
 
     def filter_properties_list(self):
 
diff --git a/zfs_autobackup/ZfsDataset.py b/zfs_autobackup/ZfsDataset.py
index 985afee..a0038d9 100644
--- a/zfs_autobackup/ZfsDataset.py
+++ b/zfs_autobackup/ZfsDataset.py
@@ -22,13 +22,12 @@ class ZfsDataset:
     def __init__(self, zfs_node, name, force_exists=None):
         """
         Args:
-            :type zfs_node: ZfsNode.ZfsNode
+            :type zfs_node: ZfsNode
             :type name: str
             :type force_exists: bool
         """
         self.zfs_node = zfs_node
         self.name = name  # full name
-        self._virtual_snapshots = []
         self.invalidate()
         self.force_exists = force_exists
 
@@ -36,6 +35,7 @@ class ZfsDataset:
         return "{}: {}".format(self.zfs_node, self.name)
 
     def __str__(self):
+
         return self.name
 
     def __eq__(self, obj):
@@ -76,7 +76,6 @@ class ZfsDataset:
         """clear caches"""
         CachedProperty.clear(self)
         self.force_exists = None
-        self._virtual_snapshots = []
 
     def split_path(self):
         """return the path elements as an array"""
@@ -365,7 +364,7 @@ class ZfsDataset:
             return True
 
     def is_ours(self):
-        """return true if this snapshot name has format"""
+        """return true if this snapshot name belong to the current backup_name and snapshot formatting"""
         try:
             test = self.timestamp
         except ValueError as e:
@@ -418,17 +417,6 @@ class ZfsDataset:
                 seconds = time.mktime(dt.timetuple())
         return seconds
 
-    def from_names(self, names, force_exists=None):
-        """convert a list[names] to a list ZfsDatasets for this zfs_node
-
-        Args:
-            :type names: list[str]
-        """
-        ret = []
-        for name in names:
-            ret.append(self.zfs_node.get_dataset(name, force_exists))
-
-        return ret
 
     # def add_virtual_snapshot(self, snapshot):
     #     """pretend a snapshot exists (usefull in test mode)"""
@@ -449,17 +437,13 @@ class ZfsDataset:
         :rtype: ZfsDataset
         """
 
-        #FIXME: dont check for existance. (currenlty needed for _add_virtual_snapshots)
-        if not self.exists:
-            return []
-
         self.debug("Getting snapshots")
 
         cmd = [
             "zfs", "list", "-d", "1", "-r", "-t", "snapshot", "-H", "-o", "name", self.name
         ]
 
-        return self.from_names(self.zfs_node.run(cmd=cmd, readonly=True), force_exists=True)
+        return self.zfs_node.get_datasets(self.zfs_node.run(cmd=cmd, readonly=True), force_exists=True)
 
     @property
     def our_snapshots(self):
@@ -558,7 +542,7 @@ class ZfsDataset:
             "zfs", "list", "-r", "-t", types, "-o", "name", "-H", self.name
         ])
 
-        return self.from_names(names[1:], force_exists=True)
+        return self.zfs_node.get_datasets(names[1:], force_exists=True)
 
     @CachedProperty
     def datasets(self, types="filesystem,volume"):
@@ -574,7 +558,7 @@ class ZfsDataset:
             "zfs", "list", "-r", "-t", types, "-o", "name", "-H", "-d", "1", self.name
         ])
 
-        return self.from_names(names[1:], force_exists=True)
+        return self.zfs_node.get_datasets(names[1:], force_exists=True)
 
     def send_pipe(self, features, prev_snapshot, resume_token, show_progress, raw, send_properties, write_embedded,
                   send_pipes, zfs_compressed):
@@ -851,7 +835,7 @@ class ZfsDataset:
 
         snapshots = [snapshot for snapshot in self.our_snapshots if snapshot not in ignores]
 
-        return self.zfs_node.thin(snapshots, keep_objects=keeps)
+        return self.zfs_node.thin_list(snapshots, keep_objects=keeps)
 
     def thin(self, skip_holds=False):
         """destroys snapshots according to thin_list, except last snapshot
@@ -877,7 +861,7 @@ class ZfsDataset:
             :type target_dataset: ZfsDataset
         """
 
-        if not target_dataset.snapshots:
+        if not target_dataset.exists or not target_dataset.snapshots:
             # target has nothing yet
             return None
         else:
@@ -885,7 +869,7 @@ class ZfsDataset:
                 target_snapshot = target_dataset.find_snapshot(source_snapshot)
                 if target_snapshot:
                     if guid_check and source_snapshot.properties['guid'] != target_snapshot.properties['guid']:
-                        target_snapshot.warning("Common snapshot has invalid guid, ignoring.")
+                        target_snapshot.warning("Common snapshots have mismatching GUID, ignoring.")
                     else:
                         target_snapshot.debug("common snapshot")
                         return source_snapshot
@@ -1039,7 +1023,12 @@ class ZfsDataset:
                     return resume_token
 
     def _plan_sync(self, target_dataset, also_other_snapshots, guid_check, raw):
-        """plan where to start syncing and what to sync and what to keep
+        """Determine at what snapshot to start syncing and what to sync and what to keep.
+
+
+
+
+
 
         Args:
             :rtype: ( ZfsDataset, ZfsDataset, list[ZfsDataset], list[ZfsDataset], list[ZfsDataset], list[ZfsDataset] )
@@ -1047,6 +1036,16 @@ class ZfsDataset:
             :type also_other_snapshots: bool
             :type guid_check: bool
             :type raw: bool
+
+        Returns:
+            tuple: A tuple containing:
+                - ZfsDataset: The common snapshot
+                - ZfsDataset: The start snapshot
+                - list[ZfsDataset]: Our obsolete source snapshots, after transfer is done. (will be thinned asap)
+                - list[ZfsDataset]: Our obsolete target snapshots, after transfer is done. (will be thinned asap)
+                - list[ZfsDataset]: Target snapshots that should be kept. This is after the sync would be done, so there are snapshots in it that still need to be transferred.
+                - list[ZfsDataset]: Incompatible target snapshots. Target snapshots that are in the way, after the common snapshot. (need to be destroyed to continue)
+
         """
 
         # determine common and start snapshot
@@ -1055,18 +1054,32 @@ class ZfsDataset:
         start_snapshot = self.find_start_snapshot(common_snapshot, also_other_snapshots)
         incompatible_target_snapshots = target_dataset.find_incompatible_snapshots(common_snapshot, raw)
 
-        # let thinner decide whats obsolete on source
+        # let thinner decide whats obsolete on source after the transfer is done, keeping the last snapshot as common.
         source_obsoletes = []
         if self.our_snapshots:
             source_obsoletes = self.thin_list(keeps=[self.our_snapshots[-1]])[1]
 
-        # let thinner decide keeps/obsoletes on target, AFTER the transfer would be done (by using virtual snapshots)
-        target_dataset._add_virtual_snapshots(self, start_snapshot, also_other_snapshots)
+
+        # a list of all our possible target snapshots
+        possible_target_snapshots=[]
+
+        # start with snapshots that already exist, minus imcompatibles
+        if target_dataset.exists:
+            possible_target_snapshots = [snapshot for snapshot in target_dataset.our_snapshots if snapshot not in incompatible_target_snapshots]
+
+        #add our snaphots from the source to target list
+        source_snapshot = start_snapshot
+        while source_snapshot:
+            target_snapshot=target_dataset.zfs_node.get_dataset(target_dataset.filesystem_name + "@" + source_snapshot.snapshot_name, force_exists=False)
+            if target_snapshot.is_ours():
+                possible_target_snapshots.append(target_snapshot)
+            source_snapshot = self.find_next_snapshot(source_snapshot, False)
+
+        #let the thinner decide what it wants to keep after the transfer is done.
         target_keeps = []
         target_obsoletes = []
-        if target_dataset.our_snapshots:
-            (target_keeps, target_obsoletes) = target_dataset.thin_list(keeps=[target_dataset.our_snapshots[-1]],
-                                                                        ignores=incompatible_target_snapshots)
+        if possible_target_snapshots:
+            (target_keeps, target_obsoletes)=target_dataset.zfs_node.thin_list(possible_target_snapshots, keep_objects=[possible_target_snapshots[-1]])
 
         return common_snapshot, start_snapshot, source_obsoletes, target_obsoletes, target_keeps, incompatible_target_snapshots
 
@@ -1133,6 +1146,7 @@ class ZfsDataset:
                 # keep data encrypted by sending it raw (including properties)
                 raw = True
 
+        #note: only target_obsoletes is used during sync, to check if target doesnt want the snapshot
         (common_snapshot, start_snapshot, source_obsoletes, target_obsoletes, target_keeps,
          incompatible_target_snapshots) = \
             self._plan_sync(target_dataset=target_dataset, also_other_snapshots=also_other_snapshots,
diff --git a/zfs_autobackup/ZfsNode.py b/zfs_autobackup/ZfsNode.py
index 982b20f..7c7df47 100644
--- a/zfs_autobackup/ZfsNode.py
+++ b/zfs_autobackup/ZfsNode.py
@@ -57,7 +57,14 @@ class ZfsNode(ExecuteNode):
 
         ExecuteNode.__init__(self, ssh_config=ssh_config, ssh_to=ssh_to, readonly=readonly, debug_output=debug_output)
 
-    def thin(self, objects, keep_objects):
+    def thin_list(self, objects, keep_objects):
+        """
+
+        :return: ( keeps, removes )
+        :type objects: list[ZfsDataset]
+        :type keep_objects: list[ZfsDataset]
+        :rtype: ( list[ZfsDataset], list[ZfsDataset] )
+        """
         # NOTE: if thinning is disabled with --no-thinning, self.__thinner will be none.
         if self.__thinner is not None:
 
@@ -108,10 +115,25 @@ class ZfsNode(ExecuteNode):
         return self.__pools.setdefault(zpool_name, ZfsPool(self, zpool_name))
 
     def get_dataset(self, name, force_exists=None):
-        """get a ZfsDataset() object from name. stores objects internally to enable caching"""
+        """get a ZfsDataset() object from name. stores objects internally to enable caching
+        :rtype: ZfsDataset
+        """
 
         return self.__datasets.setdefault(name, ZfsDataset(self, name, force_exists))
 
+    def get_datasets(self, names, force_exists=None):
+        """get a list of ZfsDataset() object from names. stores objects internally to enable caching
+        :rtype: list[ZfsDataset]
+
+        """
+
+        ret=[]
+        for name in names:
+            ret.append(self.get_dataset(name, force_exists))
+
+        return ret
+
+
     # def reset_progress(self):
     #     """reset progress output counters"""
     #     self._progress_total_bytes = 0