From: Eduardo Robles Elvira Date: Tue, 18 Jun 2013 10:22:36 +0000 (+0200) Subject: adding unit tests for tarfile and fixing some cases which were not working X-Git-Tag: v2.2~189 X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=ae48acc8f50652cd97ccb38e6de304e8f9f152ec;p=python-delta-tar adding unit tests for tarfile and fixing some cases which were not working --- diff --git a/deltatar/__init__.py b/deltatar/__init__.py new file mode 100644 index 0000000..8d1c8b6 --- /dev/null +++ b/deltatar/__init__.py @@ -0,0 +1 @@ + diff --git a/tarfile.py b/deltatar/tarfile.py similarity index 98% rename from tarfile.py rename to deltatar/tarfile.py index 60c9dde..9e146a7 100644 --- a/tarfile.py +++ b/deltatar/tarfile.py @@ -1584,6 +1584,11 @@ class TarFile(object): self.errorlevel = errorlevel # Init datastructures. + if max_volume_size and max_volume_size < 3*BLOCKSIZE: + raise ValueError("max_volume_size needs to be at least %d" % 3*BLOCKSIZE) + if max_volume_size and not callable(new_volume_handler): + raise ValueError("new_volume_handler needs to be set and be callable for multivolume support") + self.max_volume_size = max_volume_size self.new_volume_handler = new_volume_handler self.closed = False @@ -2026,6 +2031,17 @@ class TarFile(object): else: self.addfile(tarinfo) + def _size_left(self): + ''' + Calculates size left, assumes self.max_volume_size is set + ''' + size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset + # limit size left to a discrete number of blocks, because we won't + # write only half a block when writting the end of a volume + # and filling with zeros + blocks, remainder = divmod(size_left, BLOCKSIZE) + return blocks*BLOCKSIZE + def addfile(self, tarinfo, fileobj=None): """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is given, tarinfo.size bytes are read from it and added to the archive. @@ -2049,7 +2065,7 @@ class TarFile(object): # handle multivolume support if self.max_volume_size: - size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset + size_left = self._size_left() max_size_to_write = min(size_left, tarinfo.size - tarinfo.offset_data) else: size_left = max_size_to_write = tarinfo.size @@ -2065,17 +2081,22 @@ class TarFile(object): # going to be a file splitted in multiple volumes. # if file is going to be split in multiple volumes, having a # remainder means that there's no more space left for a block, so - # we already need to create a new volume - if max_size_to_write == tarinfo.size and remainder > 0: + # we already need to create a new volume. + if remainder > 0: self.fileobj.write(NUL * (BLOCKSIZE - remainder)) blocks += 1 + # we already assured previously that if we are doing multivolume, + # there's not going to be a remainder + if self.max_volume_size and max_size_to_write == size_left: + assert remainder == 0 + self.offset += blocks * BLOCKSIZE + size_left -= blocks * BLOCKSIZE tarinfo.offset_data += blocks * BLOCKSIZE - size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset # check if creating a new volume is needed - if size_left <= BLOCKSIZE: + if self.max_volume_size and size_left < BLOCKSIZE: tarinfo.type = GNUTYPE_MULTIVOL if not self.new_volume_handler or\ @@ -2093,7 +2114,7 @@ class TarFile(object): buf = tarinfo.tobuf(self.format, self.encoding, self.errors) self.offset += len(buf) self.fileobj.write(buf) - size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset + size_left = self._size_left() max_size_to_write = min(size_left, tarinfo.size - tarinfo.offset_data) self.members.append(tarinfo) diff --git a/runtests.py b/runtests.py new file mode 100644 index 0000000..d07a277 --- /dev/null +++ b/runtests.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python + +import unittest + +from testing.test_multivol import MultivolTest + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tarfile_multivol_example.py b/tarfile_multivol_example.py deleted file mode 100644 index a902bca..0000000 --- a/tarfile_multivol_example.py +++ /dev/null @@ -1,47 +0,0 @@ - -''' -when reading, the file being read is not going to fail reading because tar will -have writen the tar file at appropiate sizes. so it's transparent for _Stream - -when writing, it will tarobj who will notice when that the file is too big, and -thus it will be tarobj job to close the current stream and call to -new_volume_handler before continue using stream for writing. But it will be -still transparent from the stream object POV. - - -In the case of restarting gzip compression with #gz: - -For writing it will be tarobj job to stop writing current file and tell the -_Stream object to handle the new file event. So it will be _Stream job to do -that. - -For reading it will be tarobj job to notice the end of a file when reading, and -call to _Stream object to handle the new file event, in this case for reading. - -''' - -from tarfile import TarFile - -def new_volume_handler(tarobj, base_name, volume_number): - volume_path = "%s.%d" % (base_name, volume_number) - print "new volume: ", volume_path - tarobj.open_volume(volume_path) - - -# write -tar = TarFile.open("sample.tar", - mode="w", - max_volume_size=(1024**2)*4, - new_volume_handler=new_volume_handler) -tar.add("big") -tar.close() - -## read -#tar = tarfile.open("sample.tar.gz", - #mode="r#gz", - #new_volume_handler=new_volume) -#for name in ["foo", "bar", "quux"]: - #tar.add(name) -#tar.close() - -# when creating a \ No newline at end of file diff --git a/testing/__init__.py b/testing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/testing/test_multivol.py b/testing/test_multivol.py new file mode 100644 index 0000000..08b1f1e --- /dev/null +++ b/testing/test_multivol.py @@ -0,0 +1,159 @@ +import sys, os, unittest, hashlib, random, string + +from deltatar.tarfile import TarFile + +def new_volume_handler(tarobj, base_name, volume_number): + volume_path = "%s.%d" % (base_name, volume_number) + tarobj.open_volume(volume_path) + + +class MultivolTest(unittest.TestCase): + """Test multivolume support in tarfile""" + + def tearDown(self): + os.system("rm -rf big small small2 sample.tar*") + + def create_file(self, path, data): + f = open(path, 'w') + f.write(data) + f.close() + + def create_random_file(self, path, length): + f = open(path, 'w') + s = string.lowercase + string.digits + data = ''.join(random.sample(s, 1) * length) + f.write(data) + f.close() + + def md5sum(self, filename): + md5 = hashlib.md5() + with open(filename,'rb') as f: + for chunk in iter(lambda: f.read(128*md5.block_size), b''): + md5.update(chunk) + return md5.hexdigest() + + def test_no_volume(self): + """Test normal tarfile creation, no volumes """ + + # create the content of the file to compress and hash it + s = "hello" * 10000 + assert len(s) == 50000 + self.create_file("big", s) + hash = self.md5sum("big") + + # create the tar file with volumes + tarobj = TarFile.open("sample.tar", + mode="w") + tarobj.add("big") + tarobj.close() + + # check that the tar volumes were correctly created + assert os.path.exists("sample.tar") + assert not os.path.exists("sample.tar.1") + + os.unlink("big") + assert not os.path.exists("big") + + # extract and check + os.system("tar xfM sample.tar") + assert os.path.exists("big") + assert hash == self.md5sum("big") + + def test_volume_creation1(self): + """Test volumes creation""" + + # create the content of the file to compress and hash it + s = "hello" * 10000 + assert len(s) == 50000 + self.create_file("big", s) + hash = self.md5sum("big") + + # create the tar file with volumes + tarobj = TarFile.open("sample.tar", + mode="w", + max_volume_size=30000, + new_volume_handler=new_volume_handler) + tarobj.add("big") + tarobj.close() + + # check that the tar volumes were correctly created + assert os.path.exists("sample.tar") + assert os.path.exists("sample.tar.1") + assert not os.path.exists("sample.tar.2") + + os.unlink("big") + assert not os.path.exists("big") + + # extract with normal tar and check output + os.system("tar xfM sample.tar --file=sample.tar.1") + assert os.path.exists("big") + assert hash == self.md5sum("big") + + def test_volume_creation2(self): + """Test volumes creation with two volumes""" + + # create the content of the file to compress and hash it + s = "hello" * 10000 + assert len(s) == 50000 + self.create_file("big", s) + hash = self.md5sum("big") + + # create the tar file with volumes + tarobj = TarFile.open("sample.tar", + mode="w", + max_volume_size=20000, + new_volume_handler=new_volume_handler) + tarobj.add("big") + tarobj.close() + + # check that the tar volumes were correctly created + assert os.path.exists("sample.tar") + assert os.path.exists("sample.tar.1") + assert os.path.exists("sample.tar.2") + assert not os.path.exists("sample.tar.3") + + os.unlink("big") + assert not os.path.exists("big") + + # extract with normal tar and check output + os.system("tar xfM sample.tar --file=sample.tar.1 --file=sample.tar.2") + assert os.path.exists("big") + assert hash == self.md5sum("big") + + def test_multiple_files_volume(self): + # create the content of the file to compress and hash it + + # create sample data + hash = dict() + self.create_random_file("big", 50000) + hash["big"] = self.md5sum("big") + self.create_random_file("small", 100) + hash["small"] = self.md5sum("small") + self.create_random_file("small2", 354) + hash["small2"] = self.md5sum("small2") + + # create the tar file with volumes + tarobj = TarFile.open("sample.tar", + mode="w", + max_volume_size=20000, + new_volume_handler=new_volume_handler) + tarobj.add("big") + tarobj.add("small") + tarobj.add("small2") + tarobj.close() + + # check that the tar volumes were correctly created + assert os.path.exists("sample.tar") + assert os.path.exists("sample.tar.1") + assert os.path.exists("sample.tar.2") + assert not os.path.exists("sample.tar.3") + + os.unlink("big") + os.unlink("small") + os.unlink("small2") + + # extract with normal tar and check output + os.system("tar xfM sample.tar --file=sample.tar.1 --file=sample.tar.2") + for key, value in hash.iteritems(): + assert os.path.exists(key) + assert value == self.md5sum(key)