implement delayed symlink creation
authorPhilipp Gesang <philipp.gesang@intra2net.com>
Thu, 3 Nov 2016 11:02:15 +0000 (12:02 +0100)
committerPhilipp Gesang <philipp.gesang@intra2net.com>
Thu, 3 Nov 2016 13:04:21 +0000 (14:04 +0100)
Introduce a hook in ``extract()`` to invoke a callback if a
symlink is encountered in the archive. The implementation is
modeled after GNU Tar.

This is a v2 attempt on the symlink extraction problematic. The
first version simply ``unlink(2)`` all files before extraction
which is a less efficient albeit more robust strategy.

deltatar/deltatar.py
deltatar/tarfile.py

index 508048e..c1000fd 100644 (file)
@@ -1319,6 +1319,7 @@ class DeltaTar(object):
                 helper.delete(upath)
 
         helper.restore_directories_permissions()
+        helper.apply_delayed_links()
         index_it.release()
         os.chdir(cwd)
         helper.cleanup()
@@ -1337,6 +1338,10 @@ class DeltaTar(object):
         return j, l_no
 
 
+RECOVER_OK = 0
+RECOVER_NO = 1
+RECOVER_INTERDIR_MADE = 2
+
 class RestoreHelper(object):
     '''
     Class used to help to restore files from indices
@@ -1353,6 +1358,9 @@ class RestoreHelper(object):
     # tarfile.extractall for details.
     _directories = []
 
+    # collected symlinks to be restored at a later instant
+    _delayed_symlinks= []
+
     def __init__(self, deltatar, cwd, index_list=[], backup_path=False,
                  tarobj=None):
         '''
@@ -1433,6 +1441,11 @@ class RestoreHelper(object):
                 data['tarobj'].close()
                 data['tarobj'] = None
 
+    def apply_delayed_links(self):
+        data = self._data[0]
+        for member, path, set_attrs in self._delayed_symlinks:
+            data["tarobj"].extract(member, path, set_attrs=set_attrs)
+
     def delete(self, path):
         '''
         Delete a file
@@ -1675,8 +1688,26 @@ class RestoreHelper(object):
             # file might fail when trying to extract a multivolume member
             index_data['tarobj'].volume_number = index_data['curr_vol_no']
 
+        def create_placeholder_file (tarinfo, path, set_attrs, recover=RECOVER_OK):
+            try:
+                fullpath = os.path.join(path, tarinfo.name)
+                fd = os.open(fullpath, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0)
+            except FileExistsError as exn: # == EEXIST
+                if recover != RECOVER_NO: # remove existing file and retry
+                    os.unlink(fullpath)
+                    return create_placeholder_file(tarinfo, path, set_attrs,
+                                                   recover=RECOVER_NO)
+                raise exn # propagate error otherwise
+            except FileNotFoundError as exn: # == ENOENT
+                if recover == RECOVER_OK: # create interdir only once
+                    os.makedirs(path)
+                    return create_placeholder_file(tarinfo, path, set_attrs,
+                                                   recover=RECOVER_INTERDIR_MADE)
+            os.close(fd)
+            return self._delayed_symlinks.append((member, path, set_attrs))
+
         # finally, restore the file
-        index_data['tarobj'].extract(member)
+        index_data['tarobj'].extract(member, symlink_cb=create_placeholder_file)
 
     def add_member_dir(self, member):
         '''
index 220f09f..713423c 100644 (file)
@@ -2637,12 +2637,16 @@ class TarFile(object):
                 else:
                     self._dbg(1, "tarfile: %s" % e)
 
-    def extract(self, member, path="", set_attrs=True):
+    def extract(self, member, path="", set_attrs=True, symlink_cb=None):
         """Extract a member from the archive to the current working directory,
            using its full name. Its file information is extracted as accurately
            as possible. `member' may be a filename or a TarInfo object. You can
            specify a different directory using `path'. File attributes (owner,
            mtime, mode) are set unless `set_attrs' is False.
+           ``symlink_cb`` is a hook accepting a function that is passed the
+           ``member``, ``path``, and ``set_attrs`` arguments if the tarinfo for
+           ``member`` indicates a symlink in which case only the callback
+           passed will be applied, skipping the actual extraction.
         """
         self._check("r")
 
@@ -2655,6 +2659,9 @@ class TarFile(object):
         if tarinfo.islnk():
             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
 
+        if symlink_cb is not None and tarinfo.issym():
+            return symlink_cb(tarinfo, path, set_attrs)
+
         try:
             self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
                                  set_attrs=set_attrs)