From: Philipp Gesang Date: Fri, 28 Oct 2016 15:02:31 +0000 (+0200) Subject: add unlink-before-extract behavior for tarfile X-Git-Tag: v2.2~4^2~2 X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=c650acfaa0062530ed59219d5babba40021d3bdf;p=python-delta-tar add unlink-before-extract behavior for tarfile Implement optional removal of existing files analogous to the -U option of GNU tar and bsdtar. This is an effective measure against symlink attacks which tarfile.py is not capable of mitigating. Signed-off-by: Philipp Gesang --- diff --git a/deltatar/deltatar.py b/deltatar/deltatar.py index 460d274..71bc904 100644 --- a/deltatar/deltatar.py +++ b/deltatar/deltatar.py @@ -1348,7 +1348,7 @@ class DeltaTar(object): return False filter = partial(filter, self, list_func) - tarobj.extractall(filter=filter) + tarobj.extractall(filter=filter, unlink=True) tarobj.close() def restore_backup(self, target_path, backup_indexes_paths=[], @@ -2014,7 +2014,8 @@ class RestoreHelper(object): self._deltatar.logger.warning("Ignoring symlink %s" % member.name) # finally, restore the file - index_data['tarobj'].extract(member, symlink_cb=ignore_symlink) + index_data['tarobj'].extract(member, symlink_cb=ignore_symlink, + unlink=True) def add_member_dir(self, member): ''' diff --git a/deltatar/tarfile.py b/deltatar/tarfile.py index b4b784a..325feb8 100644 --- a/deltatar/tarfile.py +++ b/deltatar/tarfile.py @@ -2841,7 +2841,7 @@ class TarFile(object): self.closed = True raise - def extractall(self, path=".", members=None, filter=None): + def extractall(self, path=".", members=None, filter=None, unlink=False): """Extract all members from the archive to the current working directory and set owner, modification time and permissions on directories afterwards. `path' specifies a different directory @@ -2866,7 +2866,7 @@ class TarFile(object): tarinfo = copy.copy(tarinfo) tarinfo.mode = 0o0700 # Do not set_attrs directories, as we will do that further down - self.extract(tarinfo, path, set_attrs=not tarinfo.isdir()) + self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(), unlink=unlink) # Reverse sort directories. directories.sort(key=lambda a: a.name) @@ -2885,7 +2885,8 @@ class TarFile(object): else: self._dbg(1, "tarfile: %s" % e) - def extract(self, member, path="", set_attrs=True, symlink_cb=None): + def extract(self, member, path="", set_attrs=True, symlink_cb=None, + unlink=False): """Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. `member' may be a filename or a TarInfo object. You can @@ -2913,7 +2914,7 @@ class TarFile(object): try: self._extract_member(tarinfo, os.path.join(path, tarinfo.name), - set_attrs=set_attrs) + set_attrs=set_attrs, unlink=unlink) except EnvironmentError as e: if self.errorlevel > 0: raise @@ -2961,7 +2962,7 @@ class TarFile(object): # blkdev, etc.), return None instead of a file object. return None - def _extract_member(self, tarinfo, targetpath, set_attrs=True): + def _extract_member(self, tarinfo, targetpath, set_attrs=True, unlink=False): """Extract the TarInfo object tarinfo to a physical file called targetpath. """ @@ -2983,6 +2984,9 @@ class TarFile(object): else: self._dbg(1, tarinfo.name) + if unlink is True: + _unlinkfirst(targetpath) + if tarinfo.isreg(): self.makefile(tarinfo, targetpath) elif tarinfo.isdir(): @@ -3295,6 +3299,15 @@ class TarFile(object): if not self._extfileobj: self.fileobj.close() self.closed = True + +def _unlinkfirst(targetpath): + try: + os.unlink(targetpath) + except OSError as e: + if e.errno == errno.ENOENT or e.errno == errno.EISDIR: + pass + + # class TarFile class TarIter: