From: Eduardo Robles Elvira Date: Wed, 3 Jul 2013 11:25:09 +0000 (+0200) Subject: adding initial concat compression support and a simple unit test X-Git-Tag: v2.2~175 X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=5fdff89f4d9fa28e6b210d40d389680072651eb7;p=python-delta-tar adding initial concat compression support and a simple unit test --- diff --git a/deltatar/tarfile.py b/deltatar/tarfile.py index 5d356e3..e7fc6da 100644 --- a/deltatar/tarfile.py +++ b/deltatar/tarfile.py @@ -37,7 +37,7 @@ version = "0.9.0" __author__ = "Lars Gustäbel (lars@gustaebel.de)" __date__ = "$Date$" __cvsid__ = "$Id$" -__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend." +__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend, Eduardo Robles." #--------- # Imports @@ -418,6 +418,7 @@ class _Stream: self.bufsize = bufsize self.buf = "" self.pos = 0L + self.concat_pos = 0L self.closed = False if comptype == "gz": @@ -462,12 +463,41 @@ class _Stream: self.name = self.name[:-3] self.__write(self.name + NUL) + def new_compression_block(self): + ''' + Used to notify a new tar block is coming to create a new zip block + ''' + if self.mode != "w": + raise CompressionError("new compression blocks can only be added in mode 'w'") + + if self.comptype == "gz": + self._new_gz_block() + else: + raise CompressionError("Concat compression only available for comptype 'gz'") + + def _new_gz_block(self): + ''' + Add a new gzip block, closing last one + ''' + import zlib + self.close(close_fileobj=False) + self.closed = False + self.concat_pos = 0L + self.crc = zlib.crc32("") & 0xffffffffL + self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, + -self.zlib.MAX_WBITS, + self.zlib.DEF_MEM_LEVEL, + 0) + timestamp = struct.pack(" 1 or mode not in "raw": raise ValueError("mode must be 'r', 'a' or 'w'") self.mode = mode + self.concat_compression = concat_compression self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode] if not fileobj: @@ -1746,6 +1780,25 @@ class TarFile(object): t._extfileobj = False return t + elif "#" in mode: + filemode, comptype = mode.split("#", 1) + filemode = filemode or "r" + comptype = comptype + + if filemode not in "rw": + raise ValueError("mode must be 'r' or 'w'") + + if comptype not in ["gz"]: + raise ValueError("comptype must be 'gz'") + + kwargs['concat_compression'] = True + + t = cls(name, filemode, + _Stream(name, filemode, comptype, fileobj, bufsize), + **kwargs) + t._extfileobj = False + return t + elif mode in "aw": return cls.taropen(name, mode, fileobj, **kwargs) @@ -2061,7 +2114,7 @@ class TarFile(object): ''' size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset # limit size left to a discrete number of blocks, because we won't - # write only half a block when writting the end of a volume + # write only half a block when writing the end of a volume # and filling with zeros blocks, remainder = divmod(size_left, BLOCKSIZE) return blocks*BLOCKSIZE @@ -2076,6 +2129,8 @@ class TarFile(object): self._check("aw") tarinfo = copy.copy(tarinfo) + if self.concat_compression: + self.fileobj.new_compression_block() buf = tarinfo.tobuf(self.format, self.encoding, self.errors) self.fileobj.write(buf) diff --git a/runtests.py b/runtests.py index 4610920..9e5f8bb 100644 --- a/runtests.py +++ b/runtests.py @@ -3,6 +3,7 @@ import unittest from testing.test_multivol import MultivolGnuFormatTest, MultivolPaxFormatTest +from testing.test_concat_compress import ConcatCompressTest if __name__ == "__main__": unittest.main() \ No newline at end of file diff --git a/testing/test_concat_compress.py b/testing/test_concat_compress.py new file mode 100644 index 0000000..f125aa2 --- /dev/null +++ b/testing/test_concat_compress.py @@ -0,0 +1,63 @@ +import os, unittest, hashlib, string + +from deltatar.tarfile import TarFile, PAX_FORMAT, GNU_FORMAT, BLOCKSIZE, _Stream, RECORDSIZE + + +class ConcatCompressTest(unittest.TestCase): + """ + Test concatenated compression in tarfiles + """ + + def tearDown(self): + ''' + Remove temporal files created by unit tests + ''' + os.system("rm -rf big small small2 sample.tar*") + + def create_file(self, path, length): + ''' + Creates a file with some gibberish inside, returning the md5sum of that + file. File path and length are specified as function arguments. + ''' + f = open(path, 'w') + s = string.lowercase + string.digits + "\n" + if len(s) < length: + s += s*(length/len(s)) + data = s[:length] + f.write(data) + f.close() + return self.md5sum(path) + + def md5sum(self, filename): + ''' + Returns the md5sum of a file specified by its filename/path + ''' + md5 = hashlib.md5() + with open(filename,'rb') as f: + for chunk in iter(lambda: f.read(128*md5.block_size), b''): + md5.update(chunk) + return md5.hexdigest() + + def test_zip_compress_concat(self): + """ + Create a tar file with only one file inside, using concat compression + mode. Then decompress it with zcat and untar it with gnu tar. + """ + + # create the content of the file to compress and hash it + hash = self.create_file("big", 50000) + + # create the tar file with volumes + tarobj = TarFile.open("sample.tar.gz", + mode="w#gz", + format=GNU_FORMAT, + concat_compression=True) + tarobj.add("big") + tarobj.close() + os.unlink("big") + + # extract with normal tar and check output + os.system("zcat sample.tar.gz > sample.tar") + os.system("tar xf sample.tar") + assert os.path.exists("big") + assert hash == self.md5sum("big")