raise Exception('Unrecognized extension')
# some initialization
- vol_no = 0
+ self.vol_no = 0
# generate the first volume name
vol_name = self.volume_name_func(backup_path, True, 0)
'''
volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
volume_path = os.path.join(backup_path, volume_name)
+ deltarobj.vol_no = volume_number
# we convert relative paths into absolute because CWD is changed
if not os.path.isabs(volume_path):
volume_path = os.path.join(cwd, volume_path)
- try:
- tarobj.open_volume(volume_path)
- except Exception, e:
- import ipdb; ipdb.set_trace()
+
+ tarobj.open_volume(volume_path)
# wraps some args from context into the handler
new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
- index_fd.write('{"type": "python-delta-tar-index", version: "1" }\n')
+ index_fd.write('{"type": "python-delta-tar-index", "version": 1 }\n')
s = '{"type": "BEGIN-FILE-LIST"}\n'
# calculate checksum and write into the stream
os.chdir(source_path)
for path in self._recursive_walk_dir('.'):
+ tarobj.add(path)
+
# TODO: reduce paths length using previous dir entries
stat = self._stat_dict(path)
- stat['volume'] = vol_no
- stat['offset'] = tarobj.fileobj.tell() # TODO: check/fix this
+ stat['volume'] = self.vol_no
+ stat['offset'] = tarobj.get_last_member_offset()
s = json.dumps(stat) + '\n'
crc = binascii.crc32(s, crc) & 0xffffffff
index_fd.write(s)
- tarobj.add(path)
-
s = '{"type": "END-FILE-LIST"}\n'
crc = binascii.crc32(s, crc) & 0xffffffff
index_fd.write(s)
to backup_indexes_paths to restore directly from a tar file without
using any file index. If it's a multivol tarfile, volume_name_func
will be called.
+
+ Note: If you want to use an index to restore a backup, this function
+ only supports to do so when the tarfile mode is either uncompressed or
+ uses concat compress mode, because otherwise it would be very slow.
'''
+ # check/sanitize input
if not isinstance(target_path, basestring):
raise Exception('Target path must be a string')
- if not isinstance(backup_tar_path, basestring):
- raise Exception('Backup tar path must be a string')
+ if backup_indexes_paths is None and backup_tar_path == []:
+ raise Exception("You have to either provide index paths or a tar path")
- if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
- raise Exception('Source path "%s" does not exist or is not a '\
- 'directory' % backup_tar_path)
+ tar_mode = (backup_indexes_paths == [])
+ if tar_mode:
+ if not isinstance(backup_tar_path, basestring):
+ raise Exception('Backup tar path must be a string')
+
+ if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
+ raise Exception('Source path "%s" does not exist or is not a '\
+ 'file' % backup_tar_path)
+
+ if not os.access(backup_tar_path, os.R_OK):
+ raise Exception('Source path "%s" is not readable' % backup_tar_path)
+ else:
+ if not isinstance(backup_indexes_paths, list):
+ raise Exception('backup_indexes_paths must be a list')
+
+ if self.mode.startswith(':') or self.mode.startswith('|'):
+ raise Exception('Restore only supports either uncompressed tars'
+ ' or concat compression when restoring from an index, and '
+ ' the open mode you provided is "%s"' % self.mode)
+
+ for index in backup_indexes_paths:
+ if not isinstance(index, basestring):
+ raise Exception('indices must be strings')
- if not os.access(backup_tar_path, os.R_OK):
- raise Exception('Source path "%s" is not readable' % backup_tar_path)
+ if not os.path.exists(index) or not os.path.isfile(index):
+ raise Exception('Index path "%s" does not exist or is not a '\
+ 'file' % index)
+
+ if not os.access(index, os.R_OK):
+ raise Exception('Index path "%s" is not readable' % index)
# try to create backup path if needed
if not os.path.exists(target_path):
os.makedirs(target_path)
- backup_path = os.path.dirname(backup_tar_path)
cwd = os.getcwd()
def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
'''
tarobj.open_volume(volume_path)
# wraps some args from context into the handler
- new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
- tarobj = tarfile.TarFile.open(backup_tar_path,
- mode='r' + self.mode,
- format=tarfile.GNU_FORMAT,
- concat_compression='#gz' in self.mode,
- password=self.password,
- new_volume_handler=new_volume_handler)
- os.chdir(target_path)
- tarobj.extractall()
- os.chdir(cwd)
- tarobj.close()
+ if tar_mode:
+ backup_path = os.path.dirname(backup_tar_path)
+ new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
+ tarobj = tarfile.TarFile.open(backup_tar_path,
+ mode='r' + self.mode,
+ format=tarfile.GNU_FORMAT,
+ concat_compression='#gz' in self.mode,
+ password=self.password,
+ new_volume_handler=new_volume_handler)
+ os.chdir(target_path)
+ tarobj.extractall()
+ os.chdir(cwd)
+ tarobj.close()
+ else:
+ # for now, we only consider one index
+ backup_index_path = backup_indexes_paths[0]
+ os.chdir(target_path)
+
+ # make path absolute
+ if not os.path.isabs(backup_index_path):
+ backup_index_path = os.path.join(cwd, backup_index_path)
+
+ # setup some vars
+ backup_path = os.path.dirname(backup_index_path)
+ new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
+
+ # open
+ f = open(backup_index_path, 'r')
+
+ # check index header
+ j, l_no = self._parse_json_line(f, -1)
+ if j.get("type", '') != 'python-delta-tar-index' or\
+ j.get('version', -1) != 1:
+ raise Exception("invalid index file format: %s" % json.dumps(j))
+
+ # find BEGIN-FILE-LIST, ignore other headers
+ while True:
+ j, l_no = self._parse_json_line(f, -1)
+ if j.get('type', '') == 'BEGIN-FILE-LIST':
+ break
+
+
+ # current volume number
+ curr_vol_no = None
+ # current volume file
+ vol_fd = None
+ offset = -1
+ tarobj = None
+
+ # read each file in the index and process it to do the retore
+ while True:
+ j, l_no = self._parse_json_line(f, -1)
+ op_type = j.get('type', '')
+
+ # when we detect the end of the list, break the loop
+ if op_type == 'END-FILE-LIST':
+ break
+
+ # check input
+ if op_type not in ['directory', 'file', 'link']:
+ self.logger.warn('unrecognized type to be '
+ 'restored: %s, line %d' % (op_type, l_no))
+ continue
+
+ # TODO: filter by j.get('path', '')
+
+ vol_no = j.get('volume', -1)
+ if not isinstance(vol_no, int) or vol_no < 0:
+ self.logger.warn('unrecognized type to be '
+ 'restored: %s, line %d' % (op_type, l_no))
+
+ # setup the volume that needs to be read
+ if curr_vol_no != vol_no:
+ vol_name = self.volume_name_func(backup_path, True, vol_no)
+ vol_path = os.path.join(backup_path, vol_name)
+ if vol_fd:
+ vol_fd.close()
+ vol_fd = open(vol_path, 'r')
+
+ # force reopen of the tarobj because of new volume
+ if tarobj:
+ tarobj.close()
+ tarobj = None
+
+ # seek tarfile if needed
+ offset = j.get('volume', -1)
+ if vol_fd.tell() != offset:
+ vol_fd.seek(offset)
+
+ # open tarfile if needed
+ if not tarobj:
+ tarobj = tarfile.open(mode="r" + self.mode, fileobj=vol_fd,
+ format=tarfile.GNU_FORMAT,
+ concat_compression='#gz' in self.mode,
+ password=self.password,
+ new_volume_handler=new_volume_handler)
+
+ # finally, restore the file
+ member = tarobj.next()
+ tarobj.extract(member)
+
+ if tarobj:
+ tarobj.close()
+
+ def _parse_json_line(self, f, l_no):
+ '''
+ read from a file and parses a json line and prints it on screen on error
+ '''
+ l = f.readline()
+ l_no += 1
+ try:
+ j = json.loads(l)
+ except ValueError, e:
+ raise Exception("error parsing this json line "
+ "(line number %d): %s" % (l_no, l))
+ return j, l_no