fixing bug extracting tarfiles from index offsets when using multivol
authorEduardo Robles Elvira <edulix@wadobo.com>
Wed, 31 Jul 2013 10:05:54 +0000 (12:05 +0200)
committerEduardo Robles Elvira <edulix@wadobo.com>
Wed, 31 Jul 2013 10:05:54 +0000 (12:05 +0200)
deltatar/deltatar.py
deltatar/tarfile.py
testing/test_concat_compress.py
testing/test_deltatar.py

index d8feb27..489523f 100644 (file)
@@ -365,7 +365,7 @@ class DeltaTar(object):
             stat['volume'] = self.vol_no
 
             # backup file
-            tarobj.add(path)
+            tarobj.add(path, recursive=False)
 
             # retrieve file offset
             stat['offset'] = tarobj.get_last_member_offset()
@@ -566,19 +566,23 @@ class DeltaTar(object):
 
                 # seek tarfile if needed
                 offset = j.get('offset', -1)
-                if vol_fd.tell() != offset:
-                    vol_fd.seek(offset)
+                if tarobj:
+                    member = tarobj.next()
+                    if member.path != j['path']:
+                        # force a seek and reopen
+                        tarobj.close()
+                        tarobj = None
 
-                # open tarfile if needed
                 if not tarobj:
+                    vol_fd.seek(offset)
                     tarobj = tarfile.open(mode="r" + self.mode, fileobj=vol_fd,
                                 format=tarfile.GNU_FORMAT,
                                 concat_compression='#gz' in self.mode,
                                 password=self.password,
                                 new_volume_handler=new_volume_handler)
+                    member = tarobj.next()
 
                 # finally, restore the file
-                member = tarobj.next()
                 tarobj.extract(member)
 
             os.chdir(cwd)
index d698c9f..779bc60 100644 (file)
@@ -491,14 +491,14 @@ class _Stream:
             self.name = self.name[:-3]
         self.__write(self.name + NUL)
 
-    def new_compression_block(self, set_last_block_offset=False):
+    def new_compression_block(self):
         '''
         Used to notify a new tar block is coming to create a new zip block
         '''
         if self.mode != "w":
             raise CompressionError("new compression blocks can only be added in mode 'w'")
         if self.comptype == "gz":
-            self._new_gz_block(set_last_block_offset)
+            self._new_gz_block(True)
         else:
             raise CompressionError("Concat compression only available for comptype 'gz'")
 
@@ -2064,10 +2064,7 @@ class TarFile(object):
            but when there's encryption or concat compression going on it's more
            complicated than that.
         """
-        if self.concat_compression:
-            return self.fileobj.last_block_offset
-        else:
-            return self.last_block_offset
+        return self.last_block_offset
 
     def getnames(self):
         """Return the members of the archive as a list of their names. It has
@@ -2285,7 +2282,8 @@ class TarFile(object):
 
         tarinfo = copy.copy(tarinfo)
         if self.concat_compression:
-            self.fileobj.new_compression_block(set_last_block_offset=True)
+            self.fileobj.new_compression_block()
+            self.last_block_offset = self.fileobj.last_block_offset
         else:
             self.last_block_offset = self.fileobj.tell()
 
index 3eadfad..b2e1238 100644 (file)
@@ -142,6 +142,56 @@ class ConcatCompressTest(BaseTest):
         assert not os.path.exists("big")
         assert not os.path.exists("small2")
 
+    def test_concat_extract_one_fileobj_multivol(self):
+        '''
+        Create a tar file with multiple files inside and multiple volume,
+        using concat compression mode, then decompress a file spanning two
+        volumess with tarlib module using the fileobj parameter.
+        '''
+
+        # create the content of the file to compress and hash it
+        hash = dict()
+        hash["small"] = self.create_file("small", 100000)
+        hash["big"] = self.create_file("big", 1200000)
+
+        # create the tar file with volumes
+        tarobj = TarFile.open("sample.tar.gz",
+                              mode="w#gz",
+                              concat_compression=True,
+                              max_volume_size=1000000,
+                              new_volume_handler=new_volume_handler)
+        tarobj.add("small")
+        tarobj.add("big")
+        pos = tarobj.get_last_member_offset()
+        tarobj.close()
+
+        assert os.path.exists("sample.tar.gz")
+
+        os.unlink("big")
+        os.unlink("small")
+
+        def new_volume_handler_fo(tarobj, base_name, volume_number):
+            '''
+            Handles the new volumes, ignoring base_name as it'll be None because
+            we'll be using a seek fileobj.
+            '''
+            volume_path = "sample.tar.gz.%d" % volume_number
+            tarobj.open_volume(volume_path)
+
+        # extract only the "small" file
+        fo = open("sample.tar.gz", 'r')
+        fo.seek(pos)
+        tarobj = TarFile.open(mode="r#gz", fileobj=fo,
+                              concat_compression=True,
+                              new_volume_handler=new_volume_handler_fo)
+        tarobj.extract(tarobj.next())
+        tarobj.close()
+        assert os.path.exists("big")
+        assert hash['big'] == self.md5sum("big")
+
+        # we didn't extract the other files
+        assert not os.path.exists("small")
+
     def test_multiple_files_zcat_extract(self):
         '''
         Create a tar file with only multiple files inside, using concat
index dc132f4..3dbf9a2 100644 (file)
 import os
 import shutil
 import logging
+import binascii
+import json
+from datetime import datetime
+from functools import partial
 
+from deltatar.tarfile import TarFile, GNU_FORMAT
 from deltatar.deltatar import DeltaTar
 
 import filesplit
@@ -39,7 +44,7 @@ class DeltaTarTest(BaseTest):
         '''
         Create base test data
         '''
-        os.system('rm -rf source_dir backup_dir')
+        os.system('rm -rf source_dir source_dir2 backup_dir huge')
         os.makedirs('source_dir/test/test2')
         self.hash = dict()
         self.hash["source_dir/test/test2"] = ''
@@ -55,9 +60,9 @@ class DeltaTarTest(BaseTest):
         '''
         Remove temporal files created by unit tests
         '''
-        os.system("rm -rf source_dir backup_dir")
+        os.system("rm -rf source_dir source_dir2 backup_dir huge")
 
-    def test_create_simple_full_backup(self):
+    def test_restore_simple_full_backup(self):
         '''
         Creates a full backup without any filtering and restores it.
         '''
@@ -87,8 +92,6 @@ class DeltaTarTest(BaseTest):
         '''
         Creates a full backup and checks the index' checksum of files
         '''
-        import binascii
-        import json
         deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD,
                             logger=self.consoleLogger)
 
@@ -121,16 +124,24 @@ class DeltaTarTest(BaseTest):
             elif began_list:
                 crc = binascii.crc32(l, crc) & 0xffffffff
 
-    def test_create_multivol(self):
+
+    def test_restore_multivol(self):
         '''
-        Creates a full backup without any filtering with multiple volumes.
+        Creates a full backup without any filtering with multiple volumes and
+        restore it.
         '''
         deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD,
                             logger=self.consoleLogger)
 
+
+        self.hash = dict()
+        os.makedirs('source_dir2')
+        self.hash["source_dir2/big"]  = self.create_file("source_dir2/big", 100000)
+        self.hash["source_dir2/huge"]  = self.create_file("source_dir2/huge", 1200000)
+
         # create first backup
         deltatar.create_full_backup(
-            source_path="source_dir",
+            source_path="source_dir2",
             backup_path="backup_dir",
             max_volume_size=1)
 
@@ -139,16 +150,14 @@ class DeltaTarTest(BaseTest):
             deltatar.volume_name_func("backup_dir", True, 0)))
         assert os.path.exists(os.path.join("backup_dir",
             deltatar.volume_name_func("backup_dir", True, 1)))
-        assert os.path.exists(os.path.join("backup_dir",
-            deltatar.volume_name_func("backup_dir", True, 2)))
 
-        shutil.rmtree("source_dir")
+        shutil.rmtree("source_dir2")
 
         tar_filename = deltatar.volume_name_func('backup_dir', True, 0)
         tar_path = os.path.join("backup_dir", tar_filename)
 
         # this should automatically restore all volumes
-        deltatar.restore_backup(target_path="source_dir",
+        deltatar.restore_backup(target_path="source_dir2",
                                 backup_tar_path=tar_path)
 
         for key, value in self.hash.iteritems():
@@ -156,6 +165,69 @@ class DeltaTarTest(BaseTest):
             if value:
                 assert value == self.md5sum(key)
 
+    def test_restore_multivol_manual_from_index(self):
+        '''
+        Creates a full backup without any filtering with multiple volumes and
+        restore it.
+        '''
+        # this test only works for uncompressed or concat compressed modes
+        if self.MODE.startswith(':') or self.MODE.startswith('|'):
+            return
+
+        deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD,
+                            logger=self.consoleLogger)
+
+
+        self.hash = dict()
+        os.makedirs('source_dir2')
+        self.hash["source_dir2/big"]  = self.create_file("source_dir2/big", 100000)
+        self.hash["source_dir2/huge"]  = self.create_file("source_dir2/huge", 1200000)
+
+        # create first backup
+        deltatar.create_full_backup(
+            source_path="source_dir2",
+            backup_path="backup_dir",
+            max_volume_size=1)
+
+        assert os.path.exists("backup_dir")
+        assert os.path.exists(os.path.join("backup_dir",
+            deltatar.volume_name_func("backup_dir", True, 0)))
+        assert os.path.exists(os.path.join("backup_dir",
+            deltatar.volume_name_func("backup_dir", True, 1)))
+
+        shutil.rmtree("source_dir2")
+
+        tar_filename = deltatar.volume_name_func('backup_dir', True, 0)
+        tar_path = os.path.join("backup_dir", tar_filename)
+
+        index_filename = deltatar.index_name_func(True)
+        index_path = os.path.join("backup_dir", index_filename)
+
+        # this should automatically restore the huge file
+        f = open(index_path, 'r')
+        for l in f.readline():
+            data = json.loads(f.readline())
+            if data.get('type', '') == 'file' and data['path'] == "./huge":
+                offset = data['offset']
+                break
+
+        fo = open(tar_path, 'r')
+        fo.seek(offset)
+        def new_volume_handler(mode, tarobj, base_name, volume_number):
+            tarobj.open_volume(datetime.now().strftime(
+                "backup_dir/bfull-%y-%m-%d-%H%M-002.tar") +\
+                DeltaTar._DeltaTar__file_extensions_dict[mode])
+        new_volume_handler = partial(new_volume_handler, self.MODE)
+
+        tarobj = TarFile.open(mode="r" + self.MODE, fileobj=fo,
+                              concat_compression=True,
+                              new_volume_handler=new_volume_handler,
+                              password=self.PASSWORD)
+        tarobj.extract(tarobj.next())
+        tarobj.close()
+        assert self.hash['source_dir2/huge'] == self.md5sum('huge')
+
+        os.unlink("huge")
 
     def test_restore_from_index(self):
         '''
@@ -187,6 +259,36 @@ class DeltaTarTest(BaseTest):
             if value:
                 assert value == self.md5sum(key)
 
+    def test_restore_multivol_from_index(self):
+        '''
+        Restores a full multivolume backup using an index file.
+        '''
+        # this test only works for uncompressed or concat compressed modes
+        if self.MODE.startswith(':') or self.MODE.startswith('|'):
+            return
+
+        deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD,
+                            logger=self.consoleLogger)
+
+        # create first backup
+        deltatar.create_full_backup(
+            source_path="source_dir",
+            backup_path="backup_dir",
+            max_volume_size=1)
+
+        shutil.rmtree("source_dir")
+
+        # this should automatically restore all volumes
+        index_filename = deltatar.index_name_func(True)
+        index_path = os.path.join("backup_dir", index_filename)
+
+        deltatar.restore_backup(target_path="source_dir",
+            backup_indexes_paths=[index_path])
+
+        for key, value in self.hash.iteritems():
+            assert os.path.exists(key)
+            if value:
+                assert value == self.md5sum(key)
 
 class DeltaTar2Test(DeltaTarTest):
     '''