From 11684b1d3d15bb2a8c120abb94d00ac6084e7dcc Mon Sep 17 00:00:00 2001 From: Eduardo Robles Elvira Date: Tue, 30 Jul 2013 12:23:25 +0200 Subject: [PATCH] adding initial support to restore from index. still failing in directories --- deltatar/deltatar.py | 190 +++++++++++++++++++++++++++++++++++++++------- deltatar/tarfile.py | 3 + testing/test_deltatar.py | 30 +++++++ 3 files changed, 194 insertions(+), 29 deletions(-) diff --git a/deltatar/deltatar.py b/deltatar/deltatar.py index fa57ac0..a8f07d4 100644 --- a/deltatar/deltatar.py +++ b/deltatar/deltatar.py @@ -308,7 +308,7 @@ class DeltaTar(object): raise Exception('Unrecognized extension') # some initialization - vol_no = 0 + self.vol_no = 0 # generate the first volume name vol_name = self.volume_name_func(backup_path, True, 0) @@ -328,19 +328,18 @@ class DeltaTar(object): ''' volume_name = deltarobj.volume_name_func(backup_path, True, volume_number) volume_path = os.path.join(backup_path, volume_name) + deltarobj.vol_no = volume_number # we convert relative paths into absolute because CWD is changed if not os.path.isabs(volume_path): volume_path = os.path.join(cwd, volume_path) - try: - tarobj.open_volume(volume_path) - except Exception, e: - import ipdb; ipdb.set_trace() + + tarobj.open_volume(volume_path) # wraps some args from context into the handler new_volume_handler = partial(new_volume_handler, self, cwd, backup_path) - index_fd.write('{"type": "python-delta-tar-index", version: "1" }\n') + index_fd.write('{"type": "python-delta-tar-index", "version": 1 }\n') s = '{"type": "BEGIN-FILE-LIST"}\n' # calculate checksum and write into the stream @@ -359,17 +358,17 @@ class DeltaTar(object): os.chdir(source_path) for path in self._recursive_walk_dir('.'): + tarobj.add(path) + # TODO: reduce paths length using previous dir entries stat = self._stat_dict(path) - stat['volume'] = vol_no - stat['offset'] = tarobj.fileobj.tell() # TODO: check/fix this + stat['volume'] = self.vol_no + stat['offset'] = tarobj.get_last_member_offset() s = json.dumps(stat) + '\n' crc = binascii.crc32(s, crc) & 0xffffffff index_fd.write(s) - tarobj.add(path) - s = '{"type": "END-FILE-LIST"}\n' crc = binascii.crc32(s, crc) & 0xffffffff index_fd.write(s) @@ -412,25 +411,53 @@ class DeltaTar(object): to backup_indexes_paths to restore directly from a tar file without using any file index. If it's a multivol tarfile, volume_name_func will be called. + + Note: If you want to use an index to restore a backup, this function + only supports to do so when the tarfile mode is either uncompressed or + uses concat compress mode, because otherwise it would be very slow. ''' + # check/sanitize input if not isinstance(target_path, basestring): raise Exception('Target path must be a string') - if not isinstance(backup_tar_path, basestring): - raise Exception('Backup tar path must be a string') + if backup_indexes_paths is None and backup_tar_path == []: + raise Exception("You have to either provide index paths or a tar path") - if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path): - raise Exception('Source path "%s" does not exist or is not a '\ - 'directory' % backup_tar_path) + tar_mode = (backup_indexes_paths == []) + if tar_mode: + if not isinstance(backup_tar_path, basestring): + raise Exception('Backup tar path must be a string') + + if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path): + raise Exception('Source path "%s" does not exist or is not a '\ + 'file' % backup_tar_path) + + if not os.access(backup_tar_path, os.R_OK): + raise Exception('Source path "%s" is not readable' % backup_tar_path) + else: + if not isinstance(backup_indexes_paths, list): + raise Exception('backup_indexes_paths must be a list') + + if self.mode.startswith(':') or self.mode.startswith('|'): + raise Exception('Restore only supports either uncompressed tars' + ' or concat compression when restoring from an index, and ' + ' the open mode you provided is "%s"' % self.mode) + + for index in backup_indexes_paths: + if not isinstance(index, basestring): + raise Exception('indices must be strings') - if not os.access(backup_tar_path, os.R_OK): - raise Exception('Source path "%s" is not readable' % backup_tar_path) + if not os.path.exists(index) or not os.path.isfile(index): + raise Exception('Index path "%s" does not exist or is not a '\ + 'file' % index) + + if not os.access(index, os.R_OK): + raise Exception('Index path "%s" is not readable' % index) # try to create backup path if needed if not os.path.exists(target_path): os.makedirs(target_path) - backup_path = os.path.dirname(backup_tar_path) cwd = os.getcwd() def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number): ''' @@ -445,15 +472,120 @@ class DeltaTar(object): tarobj.open_volume(volume_path) # wraps some args from context into the handler - new_volume_handler = partial(new_volume_handler, self, cwd, backup_path) - tarobj = tarfile.TarFile.open(backup_tar_path, - mode='r' + self.mode, - format=tarfile.GNU_FORMAT, - concat_compression='#gz' in self.mode, - password=self.password, - new_volume_handler=new_volume_handler) - os.chdir(target_path) - tarobj.extractall() - os.chdir(cwd) - tarobj.close() + if tar_mode: + backup_path = os.path.dirname(backup_tar_path) + new_volume_handler = partial(new_volume_handler, self, cwd, backup_path) + tarobj = tarfile.TarFile.open(backup_tar_path, + mode='r' + self.mode, + format=tarfile.GNU_FORMAT, + concat_compression='#gz' in self.mode, + password=self.password, + new_volume_handler=new_volume_handler) + os.chdir(target_path) + tarobj.extractall() + os.chdir(cwd) + tarobj.close() + else: + # for now, we only consider one index + backup_index_path = backup_indexes_paths[0] + os.chdir(target_path) + + # make path absolute + if not os.path.isabs(backup_index_path): + backup_index_path = os.path.join(cwd, backup_index_path) + + # setup some vars + backup_path = os.path.dirname(backup_index_path) + new_volume_handler = partial(new_volume_handler, self, cwd, backup_path) + + # open + f = open(backup_index_path, 'r') + + # check index header + j, l_no = self._parse_json_line(f, -1) + if j.get("type", '') != 'python-delta-tar-index' or\ + j.get('version', -1) != 1: + raise Exception("invalid index file format: %s" % json.dumps(j)) + + # find BEGIN-FILE-LIST, ignore other headers + while True: + j, l_no = self._parse_json_line(f, -1) + if j.get('type', '') == 'BEGIN-FILE-LIST': + break + + + # current volume number + curr_vol_no = None + # current volume file + vol_fd = None + offset = -1 + tarobj = None + + # read each file in the index and process it to do the retore + while True: + j, l_no = self._parse_json_line(f, -1) + op_type = j.get('type', '') + + # when we detect the end of the list, break the loop + if op_type == 'END-FILE-LIST': + break + + # check input + if op_type not in ['directory', 'file', 'link']: + self.logger.warn('unrecognized type to be ' + 'restored: %s, line %d' % (op_type, l_no)) + continue + + # TODO: filter by j.get('path', '') + + vol_no = j.get('volume', -1) + if not isinstance(vol_no, int) or vol_no < 0: + self.logger.warn('unrecognized type to be ' + 'restored: %s, line %d' % (op_type, l_no)) + + # setup the volume that needs to be read + if curr_vol_no != vol_no: + vol_name = self.volume_name_func(backup_path, True, vol_no) + vol_path = os.path.join(backup_path, vol_name) + if vol_fd: + vol_fd.close() + vol_fd = open(vol_path, 'r') + + # force reopen of the tarobj because of new volume + if tarobj: + tarobj.close() + tarobj = None + + # seek tarfile if needed + offset = j.get('volume', -1) + if vol_fd.tell() != offset: + vol_fd.seek(offset) + + # open tarfile if needed + if not tarobj: + tarobj = tarfile.open(mode="r" + self.mode, fileobj=vol_fd, + format=tarfile.GNU_FORMAT, + concat_compression='#gz' in self.mode, + password=self.password, + new_volume_handler=new_volume_handler) + + # finally, restore the file + member = tarobj.next() + tarobj.extract(member) + + if tarobj: + tarobj.close() + + def _parse_json_line(self, f, l_no): + ''' + read from a file and parses a json line and prints it on screen on error + ''' + l = f.readline() + l_no += 1 + try: + j = json.loads(l) + except ValueError, e: + raise Exception("error parsing this json line " + "(line number %d): %s" % (l_no, l)) + return j, l_no diff --git a/deltatar/tarfile.py b/deltatar/tarfile.py index a57a6dc..b312a67 100644 --- a/deltatar/tarfile.py +++ b/deltatar/tarfile.py @@ -437,6 +437,7 @@ class _Stream: self.enctype = enctype self.key_length = key_length self.password = password + self.last_block_offset = 0L if comptype == "gz": try: @@ -2286,6 +2287,8 @@ class TarFile(object): tarinfo = copy.copy(tarinfo) if self.concat_compression: self.fileobj.new_compression_block(set_last_block_offset=True) + else: + self.last_block_offset = self.fileobj.tell() buf = tarinfo.tobuf(self.format, self.encoding, self.errors) self.fileobj.write(buf) diff --git a/testing/test_deltatar.py b/testing/test_deltatar.py index c5ba719..dd31752 100644 --- a/testing/test_deltatar.py +++ b/testing/test_deltatar.py @@ -157,6 +157,36 @@ class DeltaTarTest(BaseTest): assert value == self.md5sum(key) + def test_restore_from_index(self): + ''' + Restores a full backup from using an index file. + ''' + # this test only works for uncompressed or concat compressed modes + if self.MODE.startswith(':') or self.MODE.startswith('|'): + return + + deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD, + logger=self.consoleLogger) + + # create first backup + deltatar.create_full_backup( + source_path="source_dir", + backup_path="backup_dir", + max_volume_size=1) + + shutil.rmtree("source_dir") + + # this should automatically restore all volumes + index_filename = deltatar.index_name_func(True) + index_path = os.path.join("backup_dir", index_filename) + + deltatar.restore_backup(target_path="source_dir", + backup_indexes_paths=[index_path]) + + for key, value in self.hash.iteritems(): + assert os.path.exists(key) + if value: + assert value == self.md5sum(key) class DeltaTar2Test(DeltaTarTest): -- 1.7.1