__author__ = "Lars Gustäbel (lars@gustaebel.de)"
__date__ = "$Date$"
__cvsid__ = "$Id$"
-__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
+__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend, Eduardo Robles."
#---------
# Imports
self.bufsize = bufsize
self.buf = ""
self.pos = 0L
+ self.concat_pos = 0L
self.closed = False
if comptype == "gz":
self.name = self.name[:-3]
self.__write(self.name + NUL)
+ def new_compression_block(self):
+ '''
+ Used to notify a new tar block is coming to create a new zip block
+ '''
+ if self.mode != "w":
+ raise CompressionError("new compression blocks can only be added in mode 'w'")
+
+ if self.comptype == "gz":
+ self._new_gz_block()
+ else:
+ raise CompressionError("Concat compression only available for comptype 'gz'")
+
+ def _new_gz_block(self):
+ '''
+ Add a new gzip block, closing last one
+ '''
+ import zlib
+ self.close(close_fileobj=False)
+ self.closed = False
+ self.concat_pos = 0L
+ self.crc = zlib.crc32("") & 0xffffffffL
+ self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
+ -self.zlib.MAX_WBITS,
+ self.zlib.DEF_MEM_LEVEL,
+ 0)
+ timestamp = struct.pack("<L", long(time.time()))
+ self.__write("\037\213\010\000%s\002\377" % timestamp)
+
def write(self, s):
"""Write string s to the stream.
"""
if self.comptype == "gz":
self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
self.pos += len(s)
+ self.concat_pos += len(s)
if self.comptype != "tar":
s = self.cmp.compress(s)
self.__write(s)
self.fileobj.write(self.buf[:self.bufsize])
self.buf = self.buf[self.bufsize:]
- def close(self):
+ def close(self, close_fileobj=True):
"""Close the _Stream object. No operation should be
done on it afterwards.
"""
# To avoid irksome warnings from the `struct` module, force
# it to look positive on all boxes.
self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
- self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
+ self.fileobj.write(struct.pack("<L", self.concat_pos & 0xffffFFFFL))
- if not self._extfileobj:
+ if close_fileobj and not self._extfileobj:
self.fileobj.close()
self.closed = True
fileobject = ExFileObject # The default ExFileObject class to use.
+ concat_compression = False # Used to separate in different zip members each
+ # file, used for robustness.
+
def __init__(self, name=None, mode="r", fileobj=None, format=None,
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
errors=None, pax_headers=None, debug=None, errorlevel=None,
- max_volume_size=None, new_volume_handler=None):
+ max_volume_size=None, new_volume_handler=None, concat_compression=False):
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
read from an existing archive, 'a' to append data to an existing
file or 'w' to create a new file overwriting an existing one. `mode'
if len(mode) > 1 or mode not in "raw":
raise ValueError("mode must be 'r', 'a' or 'w'")
self.mode = mode
+ self.concat_compression = concat_compression
self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
if not fileobj:
t._extfileobj = False
return t
+ elif "#" in mode:
+ filemode, comptype = mode.split("#", 1)
+ filemode = filemode or "r"
+ comptype = comptype
+
+ if filemode not in "rw":
+ raise ValueError("mode must be 'r' or 'w'")
+
+ if comptype not in ["gz"]:
+ raise ValueError("comptype must be 'gz'")
+
+ kwargs['concat_compression'] = True
+
+ t = cls(name, filemode,
+ _Stream(name, filemode, comptype, fileobj, bufsize),
+ **kwargs)
+ t._extfileobj = False
+ return t
+
elif mode in "aw":
return cls.taropen(name, mode, fileobj, **kwargs)
'''
size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset
# limit size left to a discrete number of blocks, because we won't
- # write only half a block when writting the end of a volume
+ # write only half a block when writing the end of a volume
# and filling with zeros
blocks, remainder = divmod(size_left, BLOCKSIZE)
return blocks*BLOCKSIZE
self._check("aw")
tarinfo = copy.copy(tarinfo)
+ if self.concat_compression:
+ self.fileobj.new_compression_block()
buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
self.fileobj.write(buf)
--- /dev/null
+import os, unittest, hashlib, string
+
+from deltatar.tarfile import TarFile, PAX_FORMAT, GNU_FORMAT, BLOCKSIZE, _Stream, RECORDSIZE
+
+
+class ConcatCompressTest(unittest.TestCase):
+ """
+ Test concatenated compression in tarfiles
+ """
+
+ def tearDown(self):
+ '''
+ Remove temporal files created by unit tests
+ '''
+ os.system("rm -rf big small small2 sample.tar*")
+
+ def create_file(self, path, length):
+ '''
+ Creates a file with some gibberish inside, returning the md5sum of that
+ file. File path and length are specified as function arguments.
+ '''
+ f = open(path, 'w')
+ s = string.lowercase + string.digits + "\n"
+ if len(s) < length:
+ s += s*(length/len(s))
+ data = s[:length]
+ f.write(data)
+ f.close()
+ return self.md5sum(path)
+
+ def md5sum(self, filename):
+ '''
+ Returns the md5sum of a file specified by its filename/path
+ '''
+ md5 = hashlib.md5()
+ with open(filename,'rb') as f:
+ for chunk in iter(lambda: f.read(128*md5.block_size), b''):
+ md5.update(chunk)
+ return md5.hexdigest()
+
+ def test_zip_compress_concat(self):
+ """
+ Create a tar file with only one file inside, using concat compression
+ mode. Then decompress it with zcat and untar it with gnu tar.
+ """
+
+ # create the content of the file to compress and hash it
+ hash = self.create_file("big", 50000)
+
+ # create the tar file with volumes
+ tarobj = TarFile.open("sample.tar.gz",
+ mode="w#gz",
+ format=GNU_FORMAT,
+ concat_compression=True)
+ tarobj.add("big")
+ tarobj.close()
+ os.unlink("big")
+
+ # extract with normal tar and check output
+ os.system("zcat sample.tar.gz > sample.tar")
+ os.system("tar xf sample.tar")
+ assert os.path.exists("big")
+ assert hash == self.md5sum("big")