_Stream is intended to be used only internally.
"""
- def __init__(self, name, mode, comptype, fileobj, bufsize):
+ def __init__(self, name, mode, comptype, fileobj, bufsize, concat_stream=False):
"""Construct a _Stream object.
"""
self._extfileobj = True
self.pos = 0L
self.concat_pos = 0L
self.closed = False
+ self.flags = 0L
+ self.internal_pos = 0L
+ self.concat_stream = concat_stream
if comptype == "gz":
try:
'''
if self.mode != "w":
raise CompressionError("new compression blocks can only be added in mode 'w'")
-
if self.comptype == "gz":
self._new_gz_block()
else:
if close_fileobj and not self._extfileobj:
self.fileobj.close()
+ # read the zlib crc
+ if not close_fileobj and self.mode == "r":
+ self.__read(8)
+
self.closed = True
def _init_read_gz(self):
self.dbuf = ""
# taken from gzip.GzipFile with some alterations
- if self.__read(2) != "\037\213":
+ read2 = self.__read(2)
+ if read2 != "\037\213":
raise ReadError("not a gzip file")
- if self.__read(1) != "\010":
+
+ read1 = self.__read(1)
+ if read1 != "\010":
raise CompressionError("unsupported compression method")
- flag = ord(self.__read(1))
+ self.flags = flag = ord(self.__read(1))
self.__read(6)
if flag & 4:
buf = self.cmp.decompress(buf)
except IOError:
raise ReadError("invalid compressed data")
+
+ if self.concat_stream and len(self.cmp.unused_data) != 0:
+ self.buf = self.cmp.unused_data + self.buf
+ self.close(close_fileobj=False)
+ try:
+ self._init_read_gz()
+ self.closed = False
+ except:
+ # happens at the end of the file
+ pass
t.append(buf)
c += len(buf)
t = "".join(t)
break
t.append(buf)
c += len(buf)
+ self.internal_pos += len(buf)
t = "".join(t)
self.buf = t[size:]
return t[:size]
def __init__(self, name=None, mode="r", fileobj=None, format=None,
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
errors=None, pax_headers=None, debug=None, errorlevel=None,
- max_volume_size=None, new_volume_handler=None, concat_compression=False):
+ max_volume_size=None, new_volume_handler=None,
+ concat_compression=False):
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
read from an existing archive, 'a' to append data to an existing
file or 'w' to create a new file overwriting an existing one. `mode'
'w|' open an uncompressed stream for writing
'w|gz' open a gzip compressed stream for writing
'w|bz2' open a bzip2 compressed stream for writing
+
+ 'r#gz' open a stream of gzip compressed tar blocks for reading
+ 'w#gz' open a stream of gzip compressed tar blocks for writing
"""
if not name and not fileobj:
kwargs['concat_compression'] = True
t = cls(name, filemode,
- _Stream(name, filemode, comptype, fileobj, bufsize),
+ _Stream(name, filemode, comptype, fileobj, bufsize, concat_stream=True),
**kwargs)
t._extfileobj = False
return t
md5.update(chunk)
return md5.hexdigest()
- def test_zip_compress_concat(self):
+ def test_zcat_extract_concat(self):
"""
Create a tar file with only one file inside, using concat compression
mode. Then decompress it with zcat and untar it with gnu tar.
os.system("tar xf sample.tar")
assert os.path.exists("big")
assert hash == self.md5sum("big")
+
+ def test_concat_extract(self):
+ '''
+ Create a tar file with only one file inside, using concat compression
+ mode, then decompress it with tarlib module too.
+ '''
+
+ # create the content of the file to compress and hash it
+ hash = self.create_file("big", 50000)
+
+ # create the tar file with volumes
+ tarobj = TarFile.open("sample.tar.gz",
+ mode="w#gz",
+ concat_compression=True)
+ tarobj.add("big")
+ tarobj.close()
+ os.unlink("big")
+
+ tarobj = TarFile.open("sample.tar.gz",
+ mode="r#gz")
+ tarobj.extractall()
+ tarobj.close()
+ assert os.path.exists("big")
+ assert hash == self.md5sum("big")
+
+ def test_multiple_files_zcat_extract(self):
+ '''
+ Create a tar file with only multiple files inside, using concat
+ compression mode, then decompress the tarfile.
+ '''
+
+ # create sample data
+ hash = dict()
+ hash["big"] = self.create_file("big", 50000)
+ hash["small"] = self.create_file("small", 100)
+ hash["small2"] = self.create_file("small2", 354)
+
+ # create the tar file with volumes
+ tarobj = TarFile.open("sample.tar.gz",
+ mode="w#gz",
+ concat_compression=True)
+ tarobj.add("big")
+ tarobj.add("small")
+ tarobj.add("small2")
+ tarobj.close()
+
+ assert os.path.exists("sample.tar.gz")
+
+ os.unlink("big")
+ os.unlink("small")
+ os.unlink("small2")
+
+ # extract and check output
+ os.system("zcat sample.tar.gz > sample.tar")
+ tarobj = TarFile.open("sample.tar",
+ mode="r")
+ tarobj.extractall()
+ tarobj.close()
+
+ for key, value in hash.iteritems():
+ assert os.path.exists(key)
+ assert value == self.md5sum(key)
+
+ def test_multiple_files_concat_extract(self):
+ '''
+ Create a tar file with only multiple files inside, using concat
+ compression mode, then decompress the tarfile.
+ '''
+
+ # create sample data
+ hash = dict()
+ hash["big"] = self.create_file("big", 50000)
+ hash["small"] = self.create_file("small", 100)
+ hash["small2"] = self.create_file("small2", 354)
+
+ # create the tar file with volumes
+ tarobj = TarFile.open("sample.tar.gz",
+ mode="w#gz",
+ concat_compression=True)
+ tarobj.add("big")
+ tarobj.add("small")
+ tarobj.add("small2")
+ tarobj.close()
+
+ assert os.path.exists("sample.tar.gz")
+
+ os.unlink("big")
+ os.unlink("small")
+ os.unlink("small2")
+
+ # extract and check output
+ tarobj = TarFile.open("sample.tar.gz",
+ mode="r#gz")
+ tarobj.extractall()
+ tarobj.close()
+
+ for key, value in hash.iteritems():
+ assert os.path.exists(key)
+ assert value == self.md5sum(key)