self.errorlevel = errorlevel
# Init datastructures.
+ if max_volume_size and max_volume_size < 3*BLOCKSIZE:
+ raise ValueError("max_volume_size needs to be at least %d" % 3*BLOCKSIZE)
+ if max_volume_size and not callable(new_volume_handler):
+ raise ValueError("new_volume_handler needs to be set and be callable for multivolume support")
+
self.max_volume_size = max_volume_size
self.new_volume_handler = new_volume_handler
self.closed = False
else:
self.addfile(tarinfo)
+ def _size_left(self):
+ '''
+ Calculates size left, assumes self.max_volume_size is set
+ '''
+ size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset
+ # limit size left to a discrete number of blocks, because we won't
+ # write only half a block when writting the end of a volume
+ # and filling with zeros
+ blocks, remainder = divmod(size_left, BLOCKSIZE)
+ return blocks*BLOCKSIZE
+
def addfile(self, tarinfo, fileobj=None):
"""Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
given, tarinfo.size bytes are read from it and added to the archive.
# handle multivolume support
if self.max_volume_size:
- size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset
+ size_left = self._size_left()
max_size_to_write = min(size_left, tarinfo.size - tarinfo.offset_data)
else:
size_left = max_size_to_write = tarinfo.size
# going to be a file splitted in multiple volumes.
# if file is going to be split in multiple volumes, having a
# remainder means that there's no more space left for a block, so
- # we already need to create a new volume
- if max_size_to_write == tarinfo.size and remainder > 0:
+ # we already need to create a new volume.
+ if remainder > 0:
self.fileobj.write(NUL * (BLOCKSIZE - remainder))
blocks += 1
+ # we already assured previously that if we are doing multivolume,
+ # there's not going to be a remainder
+ if self.max_volume_size and max_size_to_write == size_left:
+ assert remainder == 0
+
self.offset += blocks * BLOCKSIZE
+ size_left -= blocks * BLOCKSIZE
tarinfo.offset_data += blocks * BLOCKSIZE
- size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset
# check if creating a new volume is needed
- if size_left <= BLOCKSIZE:
+ if self.max_volume_size and size_left < BLOCKSIZE:
tarinfo.type = GNUTYPE_MULTIVOL
if not self.new_volume_handler or\
buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
self.offset += len(buf)
self.fileobj.write(buf)
- size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset
+ size_left = self._size_left()
max_size_to_write = min(size_left, tarinfo.size - tarinfo.offset_data)
self.members.append(tarinfo)
+++ /dev/null
-
-'''
-when reading, the file being read is not going to fail reading because tar will
-have writen the tar file at appropiate sizes. so it's transparent for _Stream
-
-when writing, it will tarobj who will notice when that the file is too big, and
-thus it will be tarobj job to close the current stream and call to
-new_volume_handler before continue using stream for writing. But it will be
-still transparent from the stream object POV.
-
-
-In the case of restarting gzip compression with #gz:
-
-For writing it will be tarobj job to stop writing current file and tell the
-_Stream object to handle the new file event. So it will be _Stream job to do
-that.
-
-For reading it will be tarobj job to notice the end of a file when reading, and
-call to _Stream object to handle the new file event, in this case for reading.
-
-'''
-
-from tarfile import TarFile
-
-def new_volume_handler(tarobj, base_name, volume_number):
- volume_path = "%s.%d" % (base_name, volume_number)
- print "new volume: ", volume_path
- tarobj.open_volume(volume_path)
-
-
-# write
-tar = TarFile.open("sample.tar",
- mode="w",
- max_volume_size=(1024**2)*4,
- new_volume_handler=new_volume_handler)
-tar.add("big")
-tar.close()
-
-## read
-#tar = tarfile.open("sample.tar.gz",
- #mode="r#gz",
- #new_volume_handler=new_volume)
-#for name in ["foo", "bar", "quux"]:
- #tar.add(name)
-#tar.close()
-
-# when creating a
\ No newline at end of file
--- /dev/null
+import sys, os, unittest, hashlib, random, string
+
+from deltatar.tarfile import TarFile
+
+def new_volume_handler(tarobj, base_name, volume_number):
+ volume_path = "%s.%d" % (base_name, volume_number)
+ tarobj.open_volume(volume_path)
+
+
+class MultivolTest(unittest.TestCase):
+ """Test multivolume support in tarfile"""
+
+ def tearDown(self):
+ os.system("rm -rf big small small2 sample.tar*")
+
+ def create_file(self, path, data):
+ f = open(path, 'w')
+ f.write(data)
+ f.close()
+
+ def create_random_file(self, path, length):
+ f = open(path, 'w')
+ s = string.lowercase + string.digits
+ data = ''.join(random.sample(s, 1) * length)
+ f.write(data)
+ f.close()
+
+ def md5sum(self, filename):
+ md5 = hashlib.md5()
+ with open(filename,'rb') as f:
+ for chunk in iter(lambda: f.read(128*md5.block_size), b''):
+ md5.update(chunk)
+ return md5.hexdigest()
+
+ def test_no_volume(self):
+ """Test normal tarfile creation, no volumes """
+
+ # create the content of the file to compress and hash it
+ s = "hello" * 10000
+ assert len(s) == 50000
+ self.create_file("big", s)
+ hash = self.md5sum("big")
+
+ # create the tar file with volumes
+ tarobj = TarFile.open("sample.tar",
+ mode="w")
+ tarobj.add("big")
+ tarobj.close()
+
+ # check that the tar volumes were correctly created
+ assert os.path.exists("sample.tar")
+ assert not os.path.exists("sample.tar.1")
+
+ os.unlink("big")
+ assert not os.path.exists("big")
+
+ # extract and check
+ os.system("tar xfM sample.tar")
+ assert os.path.exists("big")
+ assert hash == self.md5sum("big")
+
+ def test_volume_creation1(self):
+ """Test volumes creation"""
+
+ # create the content of the file to compress and hash it
+ s = "hello" * 10000
+ assert len(s) == 50000
+ self.create_file("big", s)
+ hash = self.md5sum("big")
+
+ # create the tar file with volumes
+ tarobj = TarFile.open("sample.tar",
+ mode="w",
+ max_volume_size=30000,
+ new_volume_handler=new_volume_handler)
+ tarobj.add("big")
+ tarobj.close()
+
+ # check that the tar volumes were correctly created
+ assert os.path.exists("sample.tar")
+ assert os.path.exists("sample.tar.1")
+ assert not os.path.exists("sample.tar.2")
+
+ os.unlink("big")
+ assert not os.path.exists("big")
+
+ # extract with normal tar and check output
+ os.system("tar xfM sample.tar --file=sample.tar.1")
+ assert os.path.exists("big")
+ assert hash == self.md5sum("big")
+
+ def test_volume_creation2(self):
+ """Test volumes creation with two volumes"""
+
+ # create the content of the file to compress and hash it
+ s = "hello" * 10000
+ assert len(s) == 50000
+ self.create_file("big", s)
+ hash = self.md5sum("big")
+
+ # create the tar file with volumes
+ tarobj = TarFile.open("sample.tar",
+ mode="w",
+ max_volume_size=20000,
+ new_volume_handler=new_volume_handler)
+ tarobj.add("big")
+ tarobj.close()
+
+ # check that the tar volumes were correctly created
+ assert os.path.exists("sample.tar")
+ assert os.path.exists("sample.tar.1")
+ assert os.path.exists("sample.tar.2")
+ assert not os.path.exists("sample.tar.3")
+
+ os.unlink("big")
+ assert not os.path.exists("big")
+
+ # extract with normal tar and check output
+ os.system("tar xfM sample.tar --file=sample.tar.1 --file=sample.tar.2")
+ assert os.path.exists("big")
+ assert hash == self.md5sum("big")
+
+ def test_multiple_files_volume(self):
+ # create the content of the file to compress and hash it
+
+ # create sample data
+ hash = dict()
+ self.create_random_file("big", 50000)
+ hash["big"] = self.md5sum("big")
+ self.create_random_file("small", 100)
+ hash["small"] = self.md5sum("small")
+ self.create_random_file("small2", 354)
+ hash["small2"] = self.md5sum("small2")
+
+ # create the tar file with volumes
+ tarobj = TarFile.open("sample.tar",
+ mode="w",
+ max_volume_size=20000,
+ new_volume_handler=new_volume_handler)
+ tarobj.add("big")
+ tarobj.add("small")
+ tarobj.add("small2")
+ tarobj.close()
+
+ # check that the tar volumes were correctly created
+ assert os.path.exists("sample.tar")
+ assert os.path.exists("sample.tar.1")
+ assert os.path.exists("sample.tar.2")
+ assert not os.path.exists("sample.tar.3")
+
+ os.unlink("big")
+ os.unlink("small")
+ os.unlink("small2")
+
+ # extract with normal tar and check output
+ os.system("tar xfM sample.tar --file=sample.tar.1 --file=sample.tar.2")
+ for key, value in hash.iteritems():
+ assert os.path.exists(key)
+ assert value == self.md5sum(key)