From: Eduardo Robles Elvira Date: Fri, 5 Jul 2013 09:50:43 +0000 (+0200) Subject: extracting files in r#gz mode now works too, includes unit tests X-Git-Tag: v2.2~174 X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=85737f48c38a432f2429e9e3e4b81fed164c4b9a;p=python-delta-tar extracting files in r#gz mode now works too, includes unit tests --- diff --git a/deltatar/tarfile.py b/deltatar/tarfile.py index e7fc6da..a6fb2f6 100644 --- a/deltatar/tarfile.py +++ b/deltatar/tarfile.py @@ -397,7 +397,7 @@ class _Stream: _Stream is intended to be used only internally. """ - def __init__(self, name, mode, comptype, fileobj, bufsize): + def __init__(self, name, mode, comptype, fileobj, bufsize, concat_stream=False): """Construct a _Stream object. """ self._extfileobj = True @@ -420,6 +420,9 @@ class _Stream: self.pos = 0L self.concat_pos = 0L self.closed = False + self.flags = 0L + self.internal_pos = 0L + self.concat_stream = concat_stream if comptype == "gz": try: @@ -469,7 +472,6 @@ class _Stream: ''' if self.mode != "w": raise CompressionError("new compression blocks can only be added in mode 'w'") - if self.comptype == "gz": self._new_gz_block() else: @@ -537,6 +539,10 @@ class _Stream: if close_fileobj and not self._extfileobj: self.fileobj.close() + # read the zlib crc + if not close_fileobj and self.mode == "r": + self.__read(8) + self.closed = True def _init_read_gz(self): @@ -546,12 +552,15 @@ class _Stream: self.dbuf = "" # taken from gzip.GzipFile with some alterations - if self.__read(2) != "\037\213": + read2 = self.__read(2) + if read2 != "\037\213": raise ReadError("not a gzip file") - if self.__read(1) != "\010": + + read1 = self.__read(1) + if read1 != "\010": raise CompressionError("unsupported compression method") - flag = ord(self.__read(1)) + self.flags = flag = ord(self.__read(1)) self.__read(6) if flag & 4: @@ -622,6 +631,16 @@ class _Stream: buf = self.cmp.decompress(buf) except IOError: raise ReadError("invalid compressed data") + + if self.concat_stream and len(self.cmp.unused_data) != 0: + self.buf = self.cmp.unused_data + self.buf + self.close(close_fileobj=False) + try: + self._init_read_gz() + self.closed = False + except: + # happens at the end of the file + pass t.append(buf) c += len(buf) t = "".join(t) @@ -640,6 +659,7 @@ class _Stream: break t.append(buf) c += len(buf) + self.internal_pos += len(buf) t = "".join(t) self.buf = t[size:] return t[:size] @@ -1581,7 +1601,8 @@ class TarFile(object): def __init__(self, name=None, mode="r", fileobj=None, format=None, tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, errors=None, pax_headers=None, debug=None, errorlevel=None, - max_volume_size=None, new_volume_handler=None, concat_compression=False): + max_volume_size=None, new_volume_handler=None, + concat_compression=False): """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to read from an existing archive, 'a' to append data to an existing file or 'w' to create a new file overwriting an existing one. `mode' @@ -1734,6 +1755,9 @@ class TarFile(object): 'w|' open an uncompressed stream for writing 'w|gz' open a gzip compressed stream for writing 'w|bz2' open a bzip2 compressed stream for writing + + 'r#gz' open a stream of gzip compressed tar blocks for reading + 'w#gz' open a stream of gzip compressed tar blocks for writing """ if not name and not fileobj: @@ -1794,7 +1818,7 @@ class TarFile(object): kwargs['concat_compression'] = True t = cls(name, filemode, - _Stream(name, filemode, comptype, fileobj, bufsize), + _Stream(name, filemode, comptype, fileobj, bufsize, concat_stream=True), **kwargs) t._extfileobj = False return t diff --git a/testing/test_concat_compress.py b/testing/test_concat_compress.py index f125aa2..dbe7806 100644 --- a/testing/test_concat_compress.py +++ b/testing/test_concat_compress.py @@ -38,7 +38,7 @@ class ConcatCompressTest(unittest.TestCase): md5.update(chunk) return md5.hexdigest() - def test_zip_compress_concat(self): + def test_zcat_extract_concat(self): """ Create a tar file with only one file inside, using concat compression mode. Then decompress it with zcat and untar it with gnu tar. @@ -61,3 +61,102 @@ class ConcatCompressTest(unittest.TestCase): os.system("tar xf sample.tar") assert os.path.exists("big") assert hash == self.md5sum("big") + + def test_concat_extract(self): + ''' + Create a tar file with only one file inside, using concat compression + mode, then decompress it with tarlib module too. + ''' + + # create the content of the file to compress and hash it + hash = self.create_file("big", 50000) + + # create the tar file with volumes + tarobj = TarFile.open("sample.tar.gz", + mode="w#gz", + concat_compression=True) + tarobj.add("big") + tarobj.close() + os.unlink("big") + + tarobj = TarFile.open("sample.tar.gz", + mode="r#gz") + tarobj.extractall() + tarobj.close() + assert os.path.exists("big") + assert hash == self.md5sum("big") + + def test_multiple_files_zcat_extract(self): + ''' + Create a tar file with only multiple files inside, using concat + compression mode, then decompress the tarfile. + ''' + + # create sample data + hash = dict() + hash["big"] = self.create_file("big", 50000) + hash["small"] = self.create_file("small", 100) + hash["small2"] = self.create_file("small2", 354) + + # create the tar file with volumes + tarobj = TarFile.open("sample.tar.gz", + mode="w#gz", + concat_compression=True) + tarobj.add("big") + tarobj.add("small") + tarobj.add("small2") + tarobj.close() + + assert os.path.exists("sample.tar.gz") + + os.unlink("big") + os.unlink("small") + os.unlink("small2") + + # extract and check output + os.system("zcat sample.tar.gz > sample.tar") + tarobj = TarFile.open("sample.tar", + mode="r") + tarobj.extractall() + tarobj.close() + + for key, value in hash.iteritems(): + assert os.path.exists(key) + assert value == self.md5sum(key) + + def test_multiple_files_concat_extract(self): + ''' + Create a tar file with only multiple files inside, using concat + compression mode, then decompress the tarfile. + ''' + + # create sample data + hash = dict() + hash["big"] = self.create_file("big", 50000) + hash["small"] = self.create_file("small", 100) + hash["small2"] = self.create_file("small2", 354) + + # create the tar file with volumes + tarobj = TarFile.open("sample.tar.gz", + mode="w#gz", + concat_compression=True) + tarobj.add("big") + tarobj.add("small") + tarobj.add("small2") + tarobj.close() + + assert os.path.exists("sample.tar.gz") + + os.unlink("big") + os.unlink("small") + os.unlink("small2") + + # extract and check output + tarobj = TarFile.open("sample.tar.gz", + mode="r#gz") + tarobj.extractall() + tarobj.close() + + for key, value in hash.iteritems(): + assert os.path.exists(key) + assert value == self.md5sum(key)