adding initial support to restore from index. still failing in directories
authorEduardo Robles Elvira <edulix@wadobo.com>
Tue, 30 Jul 2013 10:23:25 +0000 (12:23 +0200)
committerEduardo Robles Elvira <edulix@wadobo.com>
Tue, 30 Jul 2013 10:23:25 +0000 (12:23 +0200)
deltatar/deltatar.py
deltatar/tarfile.py
testing/test_deltatar.py

index fa57ac0..a8f07d4 100644 (file)
@@ -308,7 +308,7 @@ class DeltaTar(object):
             raise Exception('Unrecognized extension')
 
         # some initialization
-        vol_no = 0
+        self.vol_no = 0
 
         # generate the first volume name
         vol_name = self.volume_name_func(backup_path, True, 0)
@@ -328,19 +328,18 @@ class DeltaTar(object):
             '''
             volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
             volume_path = os.path.join(backup_path, volume_name)
+            deltarobj.vol_no = volume_number
 
             # we convert relative paths into absolute because CWD is changed
             if not os.path.isabs(volume_path):
                 volume_path = os.path.join(cwd, volume_path)
-            try:
-                tarobj.open_volume(volume_path)
-            except Exception, e:
-                import ipdb; ipdb.set_trace()
+
+            tarobj.open_volume(volume_path)
 
         # wraps some args from context into the handler
         new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
 
-        index_fd.write('{"type": "python-delta-tar-index", version: "1" }\n')
+        index_fd.write('{"type": "python-delta-tar-index", "version": 1 }\n')
 
         s = '{"type": "BEGIN-FILE-LIST"}\n'
         # calculate checksum and write into the stream
@@ -359,17 +358,17 @@ class DeltaTar(object):
 
         os.chdir(source_path)
         for path in self._recursive_walk_dir('.'):
+            tarobj.add(path)
+
             # TODO: reduce paths length using previous dir entries
             stat = self._stat_dict(path)
-            stat['volume'] = vol_no
-            stat['offset'] = tarobj.fileobj.tell() # TODO: check/fix this
+            stat['volume'] = self.vol_no
+            stat['offset'] = tarobj.get_last_member_offset()
 
             s = json.dumps(stat) + '\n'
             crc = binascii.crc32(s, crc) & 0xffffffff
             index_fd.write(s)
 
-            tarobj.add(path)
-
         s = '{"type": "END-FILE-LIST"}\n'
         crc = binascii.crc32(s, crc) & 0xffffffff
         index_fd.write(s)
@@ -412,25 +411,53 @@ class DeltaTar(object):
           to backup_indexes_paths to restore directly from a tar file without
           using any file index. If it's a multivol tarfile, volume_name_func
           will be called.
+
+        Note: If you want to use an index to restore a backup, this function
+        only supports to do so when the tarfile mode is either uncompressed or
+        uses concat compress mode, because otherwise it would be very slow.
         '''
+        # check/sanitize input
         if not isinstance(target_path, basestring):
             raise Exception('Target path must be a string')
 
-        if not isinstance(backup_tar_path, basestring):
-            raise Exception('Backup tar path must be a string')
+        if backup_indexes_paths is None and backup_tar_path == []:
+            raise Exception("You have to either provide index paths or a tar path")
 
-        if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
-            raise Exception('Source path "%s" does not exist or is not a '\
-                            'directory' % backup_tar_path)
+        tar_mode = (backup_indexes_paths == [])
+        if tar_mode:
+            if not isinstance(backup_tar_path, basestring):
+                raise Exception('Backup tar path must be a string')
+
+            if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
+                raise Exception('Source path "%s" does not exist or is not a '\
+                                'file' % backup_tar_path)
+
+            if not os.access(backup_tar_path, os.R_OK):
+                raise Exception('Source path "%s" is not readable' % backup_tar_path)
+        else:
+            if not isinstance(backup_indexes_paths, list):
+                raise Exception('backup_indexes_paths must be a list')
+
+            if self.mode.startswith(':') or self.mode.startswith('|'):
+                raise Exception('Restore only supports either uncompressed tars'
+                    ' or concat compression when restoring from an index, and '
+                    ' the open mode you provided is "%s"' % self.mode)
+
+            for index in backup_indexes_paths:
+                if not isinstance(index, basestring):
+                    raise Exception('indices must be strings')
 
-        if not os.access(backup_tar_path, os.R_OK):
-            raise Exception('Source path "%s" is not readable' % backup_tar_path)
+                if not os.path.exists(index) or not os.path.isfile(index):
+                    raise Exception('Index path "%s" does not exist or is not a '\
+                                    'file' % index)
+
+                if not os.access(index, os.R_OK):
+                    raise Exception('Index path "%s" is not readable' % index)
 
         # try to create backup path if needed
         if not os.path.exists(target_path):
             os.makedirs(target_path)
 
-        backup_path = os.path.dirname(backup_tar_path)
         cwd = os.getcwd()
         def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
             '''
@@ -445,15 +472,120 @@ class DeltaTar(object):
             tarobj.open_volume(volume_path)
 
         # wraps some args from context into the handler
-        new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
 
-        tarobj = tarfile.TarFile.open(backup_tar_path,
-                              mode='r' + self.mode,
-                              format=tarfile.GNU_FORMAT,
-                              concat_compression='#gz' in self.mode,
-                              password=self.password,
-                              new_volume_handler=new_volume_handler)
-        os.chdir(target_path)
-        tarobj.extractall()
-        os.chdir(cwd)
-        tarobj.close()
+        if tar_mode:
+            backup_path = os.path.dirname(backup_tar_path)
+            new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
+            tarobj = tarfile.TarFile.open(backup_tar_path,
+                                mode='r' + self.mode,
+                                format=tarfile.GNU_FORMAT,
+                                concat_compression='#gz' in self.mode,
+                                password=self.password,
+                                new_volume_handler=new_volume_handler)
+            os.chdir(target_path)
+            tarobj.extractall()
+            os.chdir(cwd)
+            tarobj.close()
+        else:
+            # for now, we only consider one index
+            backup_index_path = backup_indexes_paths[0]
+            os.chdir(target_path)
+
+            # make path absolute
+            if not os.path.isabs(backup_index_path):
+                backup_index_path = os.path.join(cwd, backup_index_path)
+
+            # setup some vars
+            backup_path = os.path.dirname(backup_index_path)
+            new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
+
+            # open
+            f = open(backup_index_path, 'r')
+
+            # check index header
+            j, l_no = self._parse_json_line(f, -1)
+            if j.get("type", '') != 'python-delta-tar-index' or\
+                    j.get('version', -1) != 1:
+                raise Exception("invalid index file format: %s" % json.dumps(j))
+
+            # find BEGIN-FILE-LIST, ignore other headers
+            while True:
+                j, l_no = self._parse_json_line(f, -1)
+                if j.get('type', '') == 'BEGIN-FILE-LIST':
+                    break
+
+
+            # current volume number
+            curr_vol_no = None
+            # current volume file
+            vol_fd = None
+            offset = -1
+            tarobj = None
+
+            # read each file in the index and process it to do the retore
+            while True:
+                j, l_no = self._parse_json_line(f, -1)
+                op_type = j.get('type', '')
+
+                # when we detect the end of the list, break the loop
+                if op_type == 'END-FILE-LIST':
+                    break
+
+                # check input
+                if op_type not in ['directory', 'file', 'link']:
+                    self.logger.warn('unrecognized type to be '
+                                     'restored: %s, line %d' % (op_type, l_no))
+                    continue
+
+                # TODO: filter by j.get('path', '')
+
+                vol_no = j.get('volume', -1)
+                if not isinstance(vol_no, int) or vol_no < 0:
+                    self.logger.warn('unrecognized type to be '
+                                     'restored: %s, line %d' % (op_type, l_no))
+
+                # setup the volume that needs to be read
+                if curr_vol_no != vol_no:
+                    vol_name = self.volume_name_func(backup_path, True, vol_no)
+                    vol_path = os.path.join(backup_path, vol_name)
+                    if vol_fd:
+                        vol_fd.close()
+                    vol_fd = open(vol_path, 'r')
+
+                    # force reopen of the tarobj because of new volume
+                    if tarobj:
+                        tarobj.close()
+                        tarobj = None
+
+                # seek tarfile if needed
+                offset = j.get('volume', -1)
+                if vol_fd.tell() != offset:
+                    vol_fd.seek(offset)
+
+                # open tarfile if needed
+                if not tarobj:
+                    tarobj = tarfile.open(mode="r" + self.mode, fileobj=vol_fd,
+                                format=tarfile.GNU_FORMAT,
+                                concat_compression='#gz' in self.mode,
+                                password=self.password,
+                                new_volume_handler=new_volume_handler)
+
+                # finally, restore the file
+                member = tarobj.next()
+                tarobj.extract(member)
+
+            if tarobj:
+                tarobj.close()
+
+    def _parse_json_line(self, f, l_no):
+        '''
+        read from a file and parses a json line and prints it on screen on error
+        '''
+        l = f.readline()
+        l_no += 1
+        try:
+            j = json.loads(l)
+        except ValueError, e:
+            raise Exception("error parsing this json line "
+                "(line number %d): %s" % (l_no, l))
+        return j, l_no
index a57a6dc..b312a67 100644 (file)
@@ -437,6 +437,7 @@ class _Stream:
         self.enctype  = enctype
         self.key_length = key_length
         self.password = password
+        self.last_block_offset = 0L
 
         if comptype == "gz":
             try:
@@ -2286,6 +2287,8 @@ class TarFile(object):
         tarinfo = copy.copy(tarinfo)
         if self.concat_compression:
             self.fileobj.new_compression_block(set_last_block_offset=True)
+        else:
+            self.last_block_offset = self.fileobj.tell()
 
         buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
         self.fileobj.write(buf)
index c5ba719..dd31752 100644 (file)
@@ -157,6 +157,36 @@ class DeltaTarTest(BaseTest):
                 assert value == self.md5sum(key)
 
 
+    def test_restore_from_index(self):
+        '''
+        Restores a full backup from using an index file.
+        '''
+        # this test only works for uncompressed or concat compressed modes
+        if self.MODE.startswith(':') or self.MODE.startswith('|'):
+            return
+
+        deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD,
+                            logger=self.consoleLogger)
+
+        # create first backup
+        deltatar.create_full_backup(
+            source_path="source_dir",
+            backup_path="backup_dir",
+            max_volume_size=1)
+
+        shutil.rmtree("source_dir")
+
+        # this should automatically restore all volumes
+        index_filename = deltatar.index_name_func(True)
+        index_path = os.path.join("backup_dir", index_filename)
+
+        deltatar.restore_backup(target_path="source_dir",
+            backup_indexes_paths=[index_path])
+
+        for key, value in self.hash.iteritems():
+            assert os.path.exists(key)
+            if value:
+                assert value == self.md5sum(key)
 
 
 class DeltaTar2Test(DeltaTarTest):