From: Philipp Gesang Date: Thu, 3 Nov 2016 11:02:15 +0000 (+0100) Subject: implement delayed symlink creation X-Git-Tag: v2.2~8^2~8 X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=786addd675c68064135aa83f11de58cf37a1b6b2;p=python-delta-tar implement delayed symlink creation Introduce a hook in ``extract()`` to invoke a callback if a symlink is encountered in the archive. The implementation is modeled after GNU Tar. This is a v2 attempt on the symlink extraction problematic. The first version simply ``unlink(2)`` all files before extraction which is a less efficient albeit more robust strategy. --- diff --git a/deltatar/deltatar.py b/deltatar/deltatar.py index 508048e..c1000fd 100644 --- a/deltatar/deltatar.py +++ b/deltatar/deltatar.py @@ -1319,6 +1319,7 @@ class DeltaTar(object): helper.delete(upath) helper.restore_directories_permissions() + helper.apply_delayed_links() index_it.release() os.chdir(cwd) helper.cleanup() @@ -1337,6 +1338,10 @@ class DeltaTar(object): return j, l_no +RECOVER_OK = 0 +RECOVER_NO = 1 +RECOVER_INTERDIR_MADE = 2 + class RestoreHelper(object): ''' Class used to help to restore files from indices @@ -1353,6 +1358,9 @@ class RestoreHelper(object): # tarfile.extractall for details. _directories = [] + # collected symlinks to be restored at a later instant + _delayed_symlinks= [] + def __init__(self, deltatar, cwd, index_list=[], backup_path=False, tarobj=None): ''' @@ -1433,6 +1441,11 @@ class RestoreHelper(object): data['tarobj'].close() data['tarobj'] = None + def apply_delayed_links(self): + data = self._data[0] + for member, path, set_attrs in self._delayed_symlinks: + data["tarobj"].extract(member, path, set_attrs=set_attrs) + def delete(self, path): ''' Delete a file @@ -1675,8 +1688,26 @@ class RestoreHelper(object): # file might fail when trying to extract a multivolume member index_data['tarobj'].volume_number = index_data['curr_vol_no'] + def create_placeholder_file (tarinfo, path, set_attrs, recover=RECOVER_OK): + try: + fullpath = os.path.join(path, tarinfo.name) + fd = os.open(fullpath, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0) + except FileExistsError as exn: # == EEXIST + if recover != RECOVER_NO: # remove existing file and retry + os.unlink(fullpath) + return create_placeholder_file(tarinfo, path, set_attrs, + recover=RECOVER_NO) + raise exn # propagate error otherwise + except FileNotFoundError as exn: # == ENOENT + if recover == RECOVER_OK: # create interdir only once + os.makedirs(path) + return create_placeholder_file(tarinfo, path, set_attrs, + recover=RECOVER_INTERDIR_MADE) + os.close(fd) + return self._delayed_symlinks.append((member, path, set_attrs)) + # finally, restore the file - index_data['tarobj'].extract(member) + index_data['tarobj'].extract(member, symlink_cb=create_placeholder_file) def add_member_dir(self, member): ''' diff --git a/deltatar/tarfile.py b/deltatar/tarfile.py index 220f09f..713423c 100644 --- a/deltatar/tarfile.py +++ b/deltatar/tarfile.py @@ -2637,12 +2637,16 @@ class TarFile(object): else: self._dbg(1, "tarfile: %s" % e) - def extract(self, member, path="", set_attrs=True): + def extract(self, member, path="", set_attrs=True, symlink_cb=None): """Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. `member' may be a filename or a TarInfo object. You can specify a different directory using `path'. File attributes (owner, mtime, mode) are set unless `set_attrs' is False. + ``symlink_cb`` is a hook accepting a function that is passed the + ``member``, ``path``, and ``set_attrs`` arguments if the tarinfo for + ``member`` indicates a symlink in which case only the callback + passed will be applied, skipping the actual extraction. """ self._check("r") @@ -2655,6 +2659,9 @@ class TarFile(object): if tarinfo.islnk(): tarinfo._link_target = os.path.join(path, tarinfo.linkname) + if symlink_cb is not None and tarinfo.issym(): + return symlink_cb(tarinfo, path, set_attrs) + try: self._extract_member(tarinfo, os.path.join(path, tarinfo.name), set_attrs=set_attrs)