ignore all symlinks
authorPhilipp Gesang <philipp.gesang@intra2net.com>
Mon, 7 Nov 2016 09:00:32 +0000 (10:00 +0100)
committerThomas Jarosch <thomas.jarosch@intra2net.com>
Mon, 2 Apr 2018 09:56:01 +0000 (11:56 +0200)
Don’t delay the creation of symlinks but suppress it entirely.

The rationale is that extraction with deltatar will only ever
operate on inputs whose symlinks are dereferenced upon archive
creation. Thus valid archives will not contain symlinks at all.

Also, it would appear that deltatar assumes paths of objects
inside a tarball are unique. If the tarball contains ultiple
objects with the same path, it will extract only the first one it
encounters and ignore the rest. This means that it would take at
least two successive backups to perform a symlink attack, the
first one planting the link and the second writing over the
location. This is prevented by the current mitigation strategy
(and by the --unlink option of other tar utilities).

deltatar/deltatar.py
deltatar/tarfile.py
testing/test_deltatar.py

index f889d3e..a01e3bb 100644 (file)
@@ -1319,7 +1319,6 @@ class DeltaTar(object):
                 helper.delete(upath)
 
         helper.restore_directories_permissions()
-        helper.apply_delayed_links()
         index_it.release()
         os.chdir(cwd)
         helper.cleanup()
@@ -1338,10 +1337,6 @@ class DeltaTar(object):
         return j, l_no
 
 
-RECOVER_OK = 0
-RECOVER_NO = 1
-RECOVER_INTERDIR_MADE = 2
-
 class RestoreHelper(object):
     '''
     Class used to help to restore files from indices
@@ -1358,9 +1353,6 @@ class RestoreHelper(object):
     # tarfile.extractall for details.
     _directories = []
 
-    # collected symlinks to be restored at a later instant
-    _delayed_symlinks= []
-
     def __init__(self, deltatar, cwd, index_list=[], backup_path=False,
                  tarobj=None):
         '''
@@ -1441,29 +1433,6 @@ class RestoreHelper(object):
                 data['tarobj'].close()
                 data['tarobj'] = None
 
-    def apply_delayed_links(self):
-        data = self._data[0]
-        # only restore those links whose placeholder file hasn’t been removed
-        # during subsequent extraction
-        for member, path, set_attrs, st_dev, st_ino in self._delayed_symlinks:
-            fullpath = os.path.join(path, member.name)
-            try:
-                st = os.stat(fullpath)
-            except OSError:
-                self._deltatar.logger.warning \
-                    ("Not restoring symlink %s from tarball: placeholder "
-                     "file was deleted during extraction" % fullpath)
-                continue
-            if st.st_dev != st_dev or st.st_ino != st_ino:
-                self._deltatar.logger.warning \
-                    ("Not restoring symlink %s from tarball: placeholder "
-                     "file was modified during extraction" % fullpath)
-                continue
-            # at this point we’re certain we’re dealing with the placeholder we
-            # created so we can remove it and create the actual symlink
-            os.unlink(fullpath)
-            data["tarobj"].extract(member, path, set_attrs=set_attrs)
-
     def delete(self, path):
         '''
         Delete a file
@@ -1706,32 +1675,11 @@ class RestoreHelper(object):
             # file might fail when trying to extract a multivolume member
             index_data['tarobj'].volume_number = index_data['curr_vol_no']
 
-        def create_placeholder_file (member, path, set_attrs, recover=RECOVER_OK):
-            try:
-                fullpath = os.path.join(path, member.name)
-                fd = os.open(fullpath, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0)
-            except FileExistsError as exn: # == EEXIST
-                if recover != RECOVER_NO: # remove existing file and retry
-                    os.unlink(fullpath)
-                    return create_placeholder_file(member, path, set_attrs,
-                                                   recover=RECOVER_NO)
-                raise exn # propagate error otherwise
-            except FileNotFoundError as exn: # == ENOENT
-                if recover == RECOVER_OK: # create interdir only once
-                    os.makedirs(path)
-                    return create_placeholder_file(member, path, set_attrs,
-                                                   recover=RECOVER_INTERDIR_MADE)
-            st = os.fstat(fd)
-            os.close(fd)
-            return self._delayed_symlinks.append((member, path, set_attrs,
-                                                  # GNU tar also stores
-                                                  # st_birthtim[e] (via gnulib)
-                                                  # which is not available on
-                                                  # Linux
-                                                  st.st_dev, st.st_ino))
+        def ignore_symlink (member, *_args):
+            self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
 
         # finally, restore the file
-        index_data['tarobj'].extract(member, symlink_cb=create_placeholder_file)
+        index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
 
     def add_member_dir(self, member):
         '''
index 4aef3ae..07c33f8 100644 (file)
@@ -275,28 +275,6 @@ def filemode(mode):
                   DeprecationWarning, 2)
     return stat.filemode(mode)
 
-def contains_dot_dot(name):
-    """Check whether a path contains double dots thus referring to the parent
-       directory.
-    """
-    l = len(name)
-    if l < 2:
-        return False
-    p = 0
-    while True:
-        if name[p] == '.' and name[p + 1] == '.':
-            ppp = p + 2
-            if ppp == l or name[ppp] == '/':
-                return True
-        while name[p] != '/':
-            if p == l - 2:
-                return False
-            p += 1
-        if p == l - 2:
-            break
-        p += 1
-    return False
-
 class TarError(Exception):
     """Base exception."""
     pass
@@ -2668,7 +2646,8 @@ class TarFile(object):
            ``symlink_cb`` is a hook accepting a function that is passed the
            ``member``, ``path``, and ``set_attrs`` arguments if the tarinfo for
            ``member`` indicates a symlink in which case only the callback
-           passed will be applied, skipping the actual extraction.
+           passed will be applied, skipping the actual extraction. In case the
+           callback is invoked, its return value is passed on to the caller.
         """
         self._check("r")
 
@@ -2681,9 +2660,7 @@ class TarFile(object):
         if tarinfo.islnk():
             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
 
-        if symlink_cb is not None and tarinfo.issym() \
-                and (os.path.isabs(tarinfo.linkname)
-                     or contains_dot_dot(tarinfo.linkname)):
+        if symlink_cb is not None and tarinfo.issym():
             return symlink_cb(member, path, set_attrs)
 
         try:
index 2c3ceea..77c7195 100644 (file)
@@ -36,9 +36,6 @@ import filesplit
 from . import BaseTest
 from . import new_volume_handler
 
-SYMLINK_GOOD = 0
-SYMLINK_BAD = 1
-
 class DeltaTarTest(BaseTest):
     """
     Test backups
@@ -1441,8 +1438,8 @@ class DeltaTarTest(BaseTest):
 
     def test_restore_with_symlinks(self):
         '''
-        Creates a full backup containing different varieties of symlinks. The
-        malicious ones must be filtered out.
+        Creates a full backup containing different varieties of symlinks. All
+        of them must be filtered out.
         '''
 
         deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD,
@@ -1460,39 +1457,26 @@ class DeltaTarTest(BaseTest):
 
         # add symlinks to existing archive
 
-        def add_symlink (a, kind, name, dst):
+        def add_symlink (a, name, dst):
             l = tarfile.TarInfo("snapshot://%s" % name)
             l.type = tarfile.SYMTYPE
             l.linkname = dst
             a.addfile(l)
-            return (kind, name, dst)
+            return name
 
         with tarfile.open(tar_path,mode="w") as a:
             checkme = \
-                [ add_symlink(a, SYMLINK_GOOD,
-                              "symlinks/foo", "internal-file")
-                , add_symlink(a, SYMLINK_BAD,
-                              "symlinks/bar", "/absolute/path")
-                , add_symlink(a, SYMLINK_BAD,
-                              "symlinks/baz", "../parent/../../paths") ]
+                [ add_symlink(a, "symlinks/foo", "internal-file")
+                , add_symlink(a, "symlinks/bar", "/absolute/path")
+                , add_symlink(a, "symlinks/baz", "../parent/../../paths") ]
 
         deltatar.restore_backup(target_path="source_dir",
                                 backup_tar_path=tar_path)
 
         # check what happened to our symlinks
-        for kind, source, dest in checkme:
-            resolve = kind == SYMLINK_GOOD
-            fullpath = os.path.join("source_dir", source)
-            assert os.path.islink(fullpath)
-            if resolve is True:
-                try:
-                    linkname = os.readlink(fullpath)
-                    assert dest == linkname
-                except OSError as exn:
-                    if exn.errno == errno.EINVAL:
-                        raise Exception("Extracted file “%s” [%s] is not"
-                                        " a valid symlink!"
-                                        % (str(source), str(os.stat(fullpath))))
+        for name in checkme:
+            fullpath = os.path.join("source_dir", name)
+            assert not os.path.exists(fullpath)
 
     def test_restore_malicious_symlinks(self):
         '''
@@ -1526,33 +1510,24 @@ class DeltaTarTest(BaseTest):
             f = tarfile.TarInfo("snapshot://%s" % name)
             f.type = tarfile.REGTYPE
             a.addfile(f)
-            return a
 
         testpath = "symlinks/pernicious-link"
-        testdst = "symlinks/pernicious-link"
+        testdst = "/tmp/does/not/exist"
+
         with tarfile.open(tar_path,mode="w") as a:
             add_symlink(a, testpath, testdst)
+            add_symlink(a, testpath, testdst+"X")
+            add_symlink(a, testpath, testdst+"XXX")
             add_file(a, testpath)
 
-        dstinfo = \
-            { "exists": os.path.exists(testdst)
-            , "chksum": os.path.isfile(testdst) and self.md5sum(testdst) or None }
-
         deltatar.restore_backup(target_path="source_dir",
                                 backup_tar_path=tar_path)
 
-        # check whether we got the link or the file
+        # check whether the link was extracted; deltatar seems to only ever
+        # retrieve the first item it finds for a given path which in the case
+        # at hand is a symlink to some non-existent path
         fullpath = os.path.join("source_dir", testpath)
-        # since tarfile unconditionally opens files with the flag combo
-        # “O_WRONLY | O_CREAT | O_TRUNC” (like GNU tar without the -k flag) we
-        # expect the link to be present, not the file from the archive; also,
-        #the link target may not contain the contents of the dummy file
-        assert os.path.islink(fullpath)
-        dst = os.readlink(fullpath)
-        assert os.path.exists(dst) == dstinfo["exists"]
-        chksum = dstinfo.get("chksum")
-        if chksum is not None:
-            assert self.md5sum(dst) == chksum
+        assert not os.path.exists(fullpath)
 
 class DeltaTar2Test(DeltaTarTest):
     '''