From: Eduardo Robles Elvira Date: Wed, 31 Jul 2013 10:05:54 +0000 (+0200) Subject: fixing bug extracting tarfiles from index offsets when using multivol X-Git-Tag: v2.2~136 X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=b8fc2f5d43480fd7c878b58c6f877683ce150874;p=python-delta-tar fixing bug extracting tarfiles from index offsets when using multivol --- diff --git a/deltatar/deltatar.py b/deltatar/deltatar.py index d8feb27..489523f 100644 --- a/deltatar/deltatar.py +++ b/deltatar/deltatar.py @@ -365,7 +365,7 @@ class DeltaTar(object): stat['volume'] = self.vol_no # backup file - tarobj.add(path) + tarobj.add(path, recursive=False) # retrieve file offset stat['offset'] = tarobj.get_last_member_offset() @@ -566,19 +566,23 @@ class DeltaTar(object): # seek tarfile if needed offset = j.get('offset', -1) - if vol_fd.tell() != offset: - vol_fd.seek(offset) + if tarobj: + member = tarobj.next() + if member.path != j['path']: + # force a seek and reopen + tarobj.close() + tarobj = None - # open tarfile if needed if not tarobj: + vol_fd.seek(offset) tarobj = tarfile.open(mode="r" + self.mode, fileobj=vol_fd, format=tarfile.GNU_FORMAT, concat_compression='#gz' in self.mode, password=self.password, new_volume_handler=new_volume_handler) + member = tarobj.next() # finally, restore the file - member = tarobj.next() tarobj.extract(member) os.chdir(cwd) diff --git a/deltatar/tarfile.py b/deltatar/tarfile.py index d698c9f..779bc60 100644 --- a/deltatar/tarfile.py +++ b/deltatar/tarfile.py @@ -491,14 +491,14 @@ class _Stream: self.name = self.name[:-3] self.__write(self.name + NUL) - def new_compression_block(self, set_last_block_offset=False): + def new_compression_block(self): ''' Used to notify a new tar block is coming to create a new zip block ''' if self.mode != "w": raise CompressionError("new compression blocks can only be added in mode 'w'") if self.comptype == "gz": - self._new_gz_block(set_last_block_offset) + self._new_gz_block(True) else: raise CompressionError("Concat compression only available for comptype 'gz'") @@ -2064,10 +2064,7 @@ class TarFile(object): but when there's encryption or concat compression going on it's more complicated than that. """ - if self.concat_compression: - return self.fileobj.last_block_offset - else: - return self.last_block_offset + return self.last_block_offset def getnames(self): """Return the members of the archive as a list of their names. It has @@ -2285,7 +2282,8 @@ class TarFile(object): tarinfo = copy.copy(tarinfo) if self.concat_compression: - self.fileobj.new_compression_block(set_last_block_offset=True) + self.fileobj.new_compression_block() + self.last_block_offset = self.fileobj.last_block_offset else: self.last_block_offset = self.fileobj.tell() diff --git a/testing/test_concat_compress.py b/testing/test_concat_compress.py index 3eadfad..b2e1238 100644 --- a/testing/test_concat_compress.py +++ b/testing/test_concat_compress.py @@ -142,6 +142,56 @@ class ConcatCompressTest(BaseTest): assert not os.path.exists("big") assert not os.path.exists("small2") + def test_concat_extract_one_fileobj_multivol(self): + ''' + Create a tar file with multiple files inside and multiple volume, + using concat compression mode, then decompress a file spanning two + volumess with tarlib module using the fileobj parameter. + ''' + + # create the content of the file to compress and hash it + hash = dict() + hash["small"] = self.create_file("small", 100000) + hash["big"] = self.create_file("big", 1200000) + + # create the tar file with volumes + tarobj = TarFile.open("sample.tar.gz", + mode="w#gz", + concat_compression=True, + max_volume_size=1000000, + new_volume_handler=new_volume_handler) + tarobj.add("small") + tarobj.add("big") + pos = tarobj.get_last_member_offset() + tarobj.close() + + assert os.path.exists("sample.tar.gz") + + os.unlink("big") + os.unlink("small") + + def new_volume_handler_fo(tarobj, base_name, volume_number): + ''' + Handles the new volumes, ignoring base_name as it'll be None because + we'll be using a seek fileobj. + ''' + volume_path = "sample.tar.gz.%d" % volume_number + tarobj.open_volume(volume_path) + + # extract only the "small" file + fo = open("sample.tar.gz", 'r') + fo.seek(pos) + tarobj = TarFile.open(mode="r#gz", fileobj=fo, + concat_compression=True, + new_volume_handler=new_volume_handler_fo) + tarobj.extract(tarobj.next()) + tarobj.close() + assert os.path.exists("big") + assert hash['big'] == self.md5sum("big") + + # we didn't extract the other files + assert not os.path.exists("small") + def test_multiple_files_zcat_extract(self): ''' Create a tar file with only multiple files inside, using concat diff --git a/testing/test_deltatar.py b/testing/test_deltatar.py index dc132f4..3dbf9a2 100644 --- a/testing/test_deltatar.py +++ b/testing/test_deltatar.py @@ -19,7 +19,12 @@ import os import shutil import logging +import binascii +import json +from datetime import datetime +from functools import partial +from deltatar.tarfile import TarFile, GNU_FORMAT from deltatar.deltatar import DeltaTar import filesplit @@ -39,7 +44,7 @@ class DeltaTarTest(BaseTest): ''' Create base test data ''' - os.system('rm -rf source_dir backup_dir') + os.system('rm -rf source_dir source_dir2 backup_dir huge') os.makedirs('source_dir/test/test2') self.hash = dict() self.hash["source_dir/test/test2"] = '' @@ -55,9 +60,9 @@ class DeltaTarTest(BaseTest): ''' Remove temporal files created by unit tests ''' - os.system("rm -rf source_dir backup_dir") + os.system("rm -rf source_dir source_dir2 backup_dir huge") - def test_create_simple_full_backup(self): + def test_restore_simple_full_backup(self): ''' Creates a full backup without any filtering and restores it. ''' @@ -87,8 +92,6 @@ class DeltaTarTest(BaseTest): ''' Creates a full backup and checks the index' checksum of files ''' - import binascii - import json deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD, logger=self.consoleLogger) @@ -121,16 +124,24 @@ class DeltaTarTest(BaseTest): elif began_list: crc = binascii.crc32(l, crc) & 0xffffffff - def test_create_multivol(self): + + def test_restore_multivol(self): ''' - Creates a full backup without any filtering with multiple volumes. + Creates a full backup without any filtering with multiple volumes and + restore it. ''' deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD, logger=self.consoleLogger) + + self.hash = dict() + os.makedirs('source_dir2') + self.hash["source_dir2/big"] = self.create_file("source_dir2/big", 100000) + self.hash["source_dir2/huge"] = self.create_file("source_dir2/huge", 1200000) + # create first backup deltatar.create_full_backup( - source_path="source_dir", + source_path="source_dir2", backup_path="backup_dir", max_volume_size=1) @@ -139,16 +150,14 @@ class DeltaTarTest(BaseTest): deltatar.volume_name_func("backup_dir", True, 0))) assert os.path.exists(os.path.join("backup_dir", deltatar.volume_name_func("backup_dir", True, 1))) - assert os.path.exists(os.path.join("backup_dir", - deltatar.volume_name_func("backup_dir", True, 2))) - shutil.rmtree("source_dir") + shutil.rmtree("source_dir2") tar_filename = deltatar.volume_name_func('backup_dir', True, 0) tar_path = os.path.join("backup_dir", tar_filename) # this should automatically restore all volumes - deltatar.restore_backup(target_path="source_dir", + deltatar.restore_backup(target_path="source_dir2", backup_tar_path=tar_path) for key, value in self.hash.iteritems(): @@ -156,6 +165,69 @@ class DeltaTarTest(BaseTest): if value: assert value == self.md5sum(key) + def test_restore_multivol_manual_from_index(self): + ''' + Creates a full backup without any filtering with multiple volumes and + restore it. + ''' + # this test only works for uncompressed or concat compressed modes + if self.MODE.startswith(':') or self.MODE.startswith('|'): + return + + deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD, + logger=self.consoleLogger) + + + self.hash = dict() + os.makedirs('source_dir2') + self.hash["source_dir2/big"] = self.create_file("source_dir2/big", 100000) + self.hash["source_dir2/huge"] = self.create_file("source_dir2/huge", 1200000) + + # create first backup + deltatar.create_full_backup( + source_path="source_dir2", + backup_path="backup_dir", + max_volume_size=1) + + assert os.path.exists("backup_dir") + assert os.path.exists(os.path.join("backup_dir", + deltatar.volume_name_func("backup_dir", True, 0))) + assert os.path.exists(os.path.join("backup_dir", + deltatar.volume_name_func("backup_dir", True, 1))) + + shutil.rmtree("source_dir2") + + tar_filename = deltatar.volume_name_func('backup_dir', True, 0) + tar_path = os.path.join("backup_dir", tar_filename) + + index_filename = deltatar.index_name_func(True) + index_path = os.path.join("backup_dir", index_filename) + + # this should automatically restore the huge file + f = open(index_path, 'r') + for l in f.readline(): + data = json.loads(f.readline()) + if data.get('type', '') == 'file' and data['path'] == "./huge": + offset = data['offset'] + break + + fo = open(tar_path, 'r') + fo.seek(offset) + def new_volume_handler(mode, tarobj, base_name, volume_number): + tarobj.open_volume(datetime.now().strftime( + "backup_dir/bfull-%y-%m-%d-%H%M-002.tar") +\ + DeltaTar._DeltaTar__file_extensions_dict[mode]) + new_volume_handler = partial(new_volume_handler, self.MODE) + + tarobj = TarFile.open(mode="r" + self.MODE, fileobj=fo, + concat_compression=True, + new_volume_handler=new_volume_handler, + password=self.PASSWORD) + tarobj.extract(tarobj.next()) + tarobj.close() + assert self.hash['source_dir2/huge'] == self.md5sum('huge') + + os.unlink("huge") def test_restore_from_index(self): ''' @@ -187,6 +259,36 @@ class DeltaTarTest(BaseTest): if value: assert value == self.md5sum(key) + def test_restore_multivol_from_index(self): + ''' + Restores a full multivolume backup using an index file. + ''' + # this test only works for uncompressed or concat compressed modes + if self.MODE.startswith(':') or self.MODE.startswith('|'): + return + + deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD, + logger=self.consoleLogger) + + # create first backup + deltatar.create_full_backup( + source_path="source_dir", + backup_path="backup_dir", + max_volume_size=1) + + shutil.rmtree("source_dir") + + # this should automatically restore all volumes + index_filename = deltatar.index_name_func(True) + index_path = os.path.join("backup_dir", index_filename) + + deltatar.restore_backup(target_path="source_dir", + backup_indexes_paths=[index_path]) + + for key, value in self.hash.iteritems(): + assert os.path.exists(key) + if value: + assert value == self.md5sum(key) class DeltaTar2Test(DeltaTarTest): '''