return "%s-%s.index%s" % (prefix, date_str, extension)
- def volume_name_func(self, backup_path, is_full, volume_number):
+ def volume_name_func(self, backup_path, is_full, volume_number, guess_name=False):
'''
function that defines the name of tar volumes. It receives the
backup_path, if it's a full backup and the volume number, and must return
the name for the corresponding volume name. Optional, DeltaTar has default
names for tar volumes.
+
+ If guess_name is activated, the file is intended not to be created but
+ to be found, and thus the date will be guessed.
'''
prefix = "bfull" if is_full else "bdiff"
- date_str = self.current_time.strftime("%y-%m-%d-%H%M")
extension = self.__file_extensions_dict[self.mode]
- return "%s-%s-%03d.tar%s" % (prefix, date_str, volume_number + 1, extension)
+ if not guess_name:
+ date_str = self.current_time.strftime("%y-%m-%d-%H%M")
+ return "%s-%s-%03d.tar%s" % (prefix, date_str, volume_number + 1, extension)
+ else:
+ prefix = prefix + "-"
+ postfix = "-%03d.tar%s" % (volume_number + 1, extension)
+ try:
+ for f in os.listdir(backup_path):
+ if f.startswith(prefix) and f.endswith(postfix):
+ return f
+ except Exception, e:
+ import ipdb; ipdb.set_trace()
+ raise Exception("volume not found")
+
def filter_path(self, path, source_path="", is_dir=None):
'''
queue = [source_path]
while queue:
- cur_path = queue.pop()
+ cur_path = queue.pop(0)
# it might have been removed in the mean time
if not os.path.exists(cur_path):
self.vol_no = 0
# generate the first volume name
- vol_name = self.volume_name_func(backup_path, True, 0)
+ vol_name = self.volume_name_func(backup_path, is_full=False,
+ volume_number=0)
tarfile_path = os.path.join(backup_path, vol_name)
# init index
- index_name = self.index_name_func(True)
+ index_name = self.index_name_func(is_full=False)
index_path = os.path.join(backup_path, index_name)
index_fd = self.open_index(index_path, 'w')
'''
Handles the new volumes
'''
- volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
+ volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
+ volume_number=volume_number)
volume_path = os.path.join(backup_path, volume_name)
deltarobj.vol_no = volume_number
dir_it = self._recursive_walk_dir('.')
dir_path_it = self.jsonize_path_iterator(dir_it)
- index_it = sorted(index_it, key=lambda x: x[0]['path'])
- dir_path_it = sorted(dir_path_it, key=lambda x: x['path'])
+ def pr(path):
+ if not path:
+ return "None"
+ else:
+ return path["path"]
# for each file to be in the backup, do:
- for ipath, dpath, l_no in self.collate_iterators(iter(index_it), iter(dir_path_it)):
+ for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
action = None
# if file is not in the index, it means it's a new file, so we have
# to take a snapshot
+
if not ipath:
action = 'snapshot'
# if the file is not in the directory iterator, it means that it has
index_fd.write('{"type": "file-list-checksum", "checksum": %d}\n' %\
crc)
index_fd.close()
+ index_it.release()
os.chdir(cwd)
tarobj.close()
def iterate_index_path(self, index_path):
- # open
- f = self.open_index(index_path, 'r')
- # check index header
- j, l_no = self._parse_json_line(f, 0)
- if j.get("type", '') != 'python-delta-tar-index' or\
- j.get('version', -1) != 1:
- raise Exception("invalid index file format: %s" % json.dumps(j))
-
- # find BEGIN-FILE-LIST, ignore other headers
- while True:
- j, l_no = self._parse_json_line(f, l_no)
- if j.get('type', '') == 'BEGIN-FILE-LIST':
- break
+ '''
+ Returns an index iterator. Internally, it uses a classic iterator class.
+ We do that instead of just yielding so that the iterator object can have
+ an additional function to close the file descriptor that is opened in
+ the constructor.
+ '''
+ class IndexPathIterator(object):
+ def __init__(self, delta_tar, index_path):
+ self.delta_tar = delta_tar
+ self.index_path = index_path
+ self.f = None
+ self.__enter__()
- # current volume number
- curr_vol_no = None
- # current volume file
- vol_fd = None
- offset = -1
- tarobj = None
+ def __iter__(self):
+ return self
- # read each file in the index and process it to do the retore
- while True:
- try:
- j, l_no = self._parse_json_line(f, l_no)
- except Exception, e:
- f.close()
- raise e
+ def release(self):
+ if self.f:
+ self.f.close()
+
+ def __enter__(self):
+ '''
+ Allows this iterator to be used with the "with" statement
+ '''
+ if self.f is None:
+ self.f = self.delta_tar.open_index(self.index_path, 'r')
+ # check index header
+ j, l_no = self.delta_tar._parse_json_line(self.f, 0)
+ if j.get("type", '') != 'python-delta-tar-index' or\
+ j.get('version', -1) != 1:
+ raise Exception("invalid index file format: %s" % json.dumps(j))
+
+ # find BEGIN-FILE-LIST, ignore other headers
+ while True:
+ j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
+ if j.get('type', '') == 'BEGIN-FILE-LIST':
+ break
+ return self
+
+ def __exit__(self, type, value, tb):
+ '''
+ Allows this iterator to be used with the "with" statement
+ '''
+ self.f.close()
+ self.f = None
- op_type = j.get('type', '')
+ def next(self):
+ # read each file in the index and process it to do the retore
+ j = {}
+ l_no = -1
+ try:
+ j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
+ except Exception, e:
+ if self.f:
+ self.f.close()
+ raise e
- # when we detect the end of the list, break the loop
- if op_type == 'END-FILE-LIST':
- f.close()
- break
+ op_type = j.get('type', '')
- # check input
- if op_type not in ['directory', 'file', 'link']:
- self.logger.warn('unrecognized type to be '
- 'restored: %s, line %d' % (op_type, l_no))
- continue
+ # when we detect the end of the list, break the loop
+ if op_type == 'END-FILE-LIST':
+ if self.f:
+ self.f.close()
+ raise StopIteration
+
+ # check input
+ if op_type not in ['directory', 'file', 'link']:
+ self.delta_tar.logger.warn('unrecognized type to be '
+ 'restored: %s, line %d' % (op_type, l_no))
+ # iterate again
+ return self.next()
+
+ return j, l_no
- yield j, l_no
+ return IndexPathIterator(self, index_path)
def jsonize_path_iterator(self, iter, strip=0):
'''
yield (elem1, elem2, l_no)
elem1, elem2 = None, None
else:
- # index2 is less
- yield (None, elem2, l_no)
- elem2 = None
+ if index2.count('/') > index1.count('/'):
+ yield (elem1, None, l_no)
+ elem1 = None
+ else:
+ yield (None, elem2, l_no)
+ elem2 = None
def restore_backup(self, target_path, backup_indexes_paths=[],
backup_tar_path=None):
'''
Handles the new volumes
'''
- volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
+ volume_name = deltarobj.volume_name_func(backup_path, True,
+ volume_number, guess_name=True)
volume_path = os.path.join(backup_path, volume_name)
# we convert relative paths into absolute because CWD is changed
tarobj.open_volume(volume_path)
backup_path = os.path.dirname(backup_tar_path)
+ if not os.path.isabs(backup_path):
+ backup_path = os.path.join(cwd, backup_path)
new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
tarobj = tarfile.TarFile.open(backup_tar_path,
mode='r' + self.mode,
os.chdir(target_path)
helper = RestoreHelper(self, cwd, backup_indexes_paths)
- index_it = helper._data[0]['iterator']
+ index_it = self.iterate_index_path(helper._data[0]["path"])
dir_it = self._recursive_walk_dir('.')
dir_path_it = self.jsonize_path_iterator(dir_it)
- index_it = sorted(index_it, key=lambda x: x[0]['path'])
- dir_path_it = sorted(dir_path_it, key=lambda x: x['path'])
- helper._data[0]['iterator'] = iter(index_it)
-
# for each file to be in the backup, do:
- for ipath, dpath, l_no in self.collate_iterators(iter(index_it), iter(dir_path_it)):
+ for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
if not ipath:
upath = dpath['path']
op_type = dpath['type']
# if file not found in dpath, we can directly restore from index
if not dpath:
+ # if the file doesn't exist and it needs to be deleted, it
+ # means that work is already done
+ if ipath['path'].startswith('delete://'):
+ continue
helper.restore(ipath, l_no)
continue
continue
# we have to restore the file, but first we need to delete the
- # current existing file
- helper.delete(upath)
+ # current existing file.
+ # we don't delete the file if it's a directory, because it might
+ # just have changed mtime, so it's quite improductive to remove
+ # it
+ if ipath and (ipath['type'] != 'directory' or
+ ipath['path'].startswith('delete://')):
+ helper.delete(upath)
if ipath:
helper.restore(ipath, l_no)
helper.restore_directories_permissions()
+ index_it.release()
os.chdir(cwd)
helper.cleanup()
self._directories = []
self._deltatar = deltatar
self._cwd = cwd
+ self._index_list = index_list
for index in index_list:
+ is_full = (index == index_list[-1])
+
# make paths absolute to avoid cwd problems
if not os.path.isabs(index):
index = os.path.join(cwd, index)
offset = -1,
tarobj = None,
path = index,
- iterator = deltatar.iterate_index_path(index),
+ is_full = is_full,
new_volume_handler = partial(self.new_volume_handler,
- self._deltatar, self._cwd, index)
+ self._deltatar, self._cwd, is_full, os.path.dirname(index))
)
self._data.append(s)
if data['tarobj']:
data['tarobj'].close()
data['tarobj'] = None
- # TODO: ad a way to close the iterator fd
- data['iterator']
def delete(self, path):
'''
Restore the path from the appropiate backup. Receives the current path
from the first index iterator. itpath must be not null.
'''
- data = self._data[0]
path = itpath['path']
- upath = self._deltatar.unprefixed(path)
- # if path is found in the first index as to be deleted or snapshotted,
- # deal with it and finish
if path.startswith('delete://'):
- self.delete(self._deltatar.unprefixed(path))
+ # the file has previously been deleted already in restore_backup in
+ # all cases so we just need to finish
return
- elif path.startswith('snapshot://'):
+
+ data = self._data[0]
+ upath = self._deltatar.unprefixed(path)
+
+ # if path is found in the first index as to be snapshotted, deal with it
+ # and finish
+ if path.startswith('snapshot://'):
self.restore_file(itpath, data, path, l_no, self._deltatar.unprefixed(path))
return
cur_index = 1
while cur_index < len(self._data):
data = self._data[cur_index]
- listit = list(data['iterator'])
- it = iter(listit)
- data['iterator'] = iter(listit)
-
- # find the path in the index
- d = None
- l_no = None
- dpath = None
- while True:
- try:
- d, l_no = it.next()
- except StopIteration:
- break
+ # NOTE: we restart the iterator each time instead of reusing it
+ # because the iterator can be walked over completely multiple times,
+ # for example if one path if not found in one index and we have to
+ # go to the next index.
+ with self._deltatar.iterate_index_path(data["path"]) as it:
+ # find the path in the index
+ d = None
+ l_no = None
+ dpath = None
+ while True:
+ try:
+ d, l_no = it.next()
+ except StopIteration:
+ break
- dpath = self._deltatar.unprefixed(d.get('path', ''))
+ dpath = self._deltatar.unprefixed(d.get('path', ''))
- if upath == dpath:
- break
+ if upath == dpath:
+ break
- if not d:
- # file not found, so it's not in the index, so it must be
- # removed
- if cur_index == 0:
- self.delete(path)
+ if not d:
+ # file not found, so it's not in the index, so it must be
+ # removed
+ if cur_index == 0:
+ self.delete(path)
+ return
+ # this means that the path was found in the first index but
+ # not in a previous one, so something wrong happened.
+ else:
+ self._deltatar.logger.warn('Error restoring file %s from '
+ 'index, not found in index %s' % (path, data['path']))
+ return
+
+ if d.get('path', '').startswith('delete://'):
+ self._deltatar.logger.warn(('Strange thing happened, file '
+ '%s was listed in first index but deleted by another '
+ 'one. Path was ignored and untouched.') % path)
return
- # this means that the path was found in the first index but
- # not in a previous one, so something wrong happened.
- else:
- self._deltatar.logger.warn('Error restoring file %s from '
- 'index, not found in index %s' % (path, data['path']))
+ elif d.get('path', '').startswith('snapshot://'):
+ self.restore_file(d, data, path, l_no, dpath)
return
-
- if d.get('path', '').startswith('delete://'):
- self._deltatar.logger.warn(('Strange thing happened, file '
- '%s was listed in first index but deleted by another '
- 'one. Path was ignored and untouched.') % path)
- return
- elif d.get('path', '').startswith('snapshot://'):
- self.restore_file(d, data, path, l_no, dpath)
- return
- elif d.get('path', '').startswith('list://'):
- continue
+ elif d.get('path', '').startswith('list://'):
+ continue
self._deltatar.logger.warn(('Error restoring file %s from index, '
'snapshot not found in any index') % path)
except tarfile.ExtractError, e:
self._deltatar.logger.warn('tarfile: %s' % e)
- @classmethod
- def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
+ @staticmethod
+ def new_volume_handler(deltarobj, cwd, is_full, backup_path, tarobj, base_name, volume_number):
'''
Handles the new volumes
'''
- volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
+ volume_name = deltarobj.volume_name_func(backup_path, is_full,
+ volume_number, guess_name=True)
volume_path = os.path.join(backup_path, volume_name)
# we convert relative paths into absolute because CWD is changed
if index_data['curr_vol_no'] != vol_no:
index_data['curr_vol_no'] = vol_no
backup_path = os.path.dirname(index_data['path'])
- vol_name = self._deltatar.volume_name_func(backup_path, True, vol_no)
+ vol_name = self._deltatar.volume_name_func(backup_path,
+ index_data['is_full'], vol_no, guess_name=True)
vol_path = os.path.join(backup_path, vol_name)
if index_data['vol_fd']:
index_data['vol_fd'].close()
member = copy.copy(member)
member.mode = 0700
+ # if it's an existing directory, we then don't need to recreate it
+ # just set the right permissions, mtime and that kind of stuff
+ if os.path.exists(member.path):
+ return
+
# finally, restore the file
index_data['tarobj'].extract(member)
(u'./small', u'./small'),
(u'./test', u'./test'),
(None, u'./zzzz'),
+ (None, u'./bigdir/a'),
+ (None, u'./bigdir/b'),
(u'./test/huge', u'./test/huge'),
(u'./test/huge2', u'./test/huge2'),
(u'./test/test2', u'./test/test2'),
- (None, u'./bigdir/a'),
- (None, u'./bigdir/b')
]
os.chdir(cwd)
prev_index_path)
# check index items
- index_path = os.path.join("backup_dir2", prev_index_filename)
+ index_path = os.path.join("backup_dir2",
+ deltatar.index_name_func(is_full=False))
index_it = deltatar.iterate_index_path(index_path)
n = 0
for i in index_it:
assert os.path.exists("backup_dir2")
shutil.rmtree("source_dir")
- tar_filename = deltatar.volume_name_func('backup_dir2', True, 0)
+ tar_filename = deltatar.volume_name_func('backup_dir2',
+ is_full=False, volume_number=0)
tar_path = os.path.join("backup_dir2", tar_filename)
# no file restored, because the diff was empty
prev_index_path)
# check index items
- index_path = os.path.join("backup_dir2", prev_index_filename)
+ index_path = os.path.join("backup_dir2", deltatar.index_name_func(is_full=False))
index_it = deltatar.iterate_index_path(index_path)
- l = sorted([i[0]['path'] for i in index_it])
- assert l == sorted([
+ l = [i[0]['path'] for i in index_it]
+
+ assert l == [
'list://./big',
'snapshot://./bigdir',
'delete://./small',
'list://./test',
'snapshot://./zzzz',
+ 'snapshot://./bigdir/a',
+ 'snapshot://./bigdir/b',
'list://./test/huge',
'list://./test/huge2',
'list://./test/test2',
- 'snapshot://./bigdir/a',
- 'snapshot://./bigdir/b'
- ])
+ ]
# check the tar file
assert os.path.exists("backup_dir2")
os.mkdir("source_dir")
open("source_dir/small", 'w').close()
- tar_filename = deltatar.volume_name_func('backup_dir2', True, 0)
+ tar_filename = deltatar.volume_name_func('backup_dir2',
+ is_full=False, volume_number=0)
tar_path = os.path.join("backup_dir2", tar_filename)
# restore the backup, this will create only the new files
prev_index_path)
# apply diff backup in target_dir
- index_filename = deltatar.index_name_func(is_full=True)
+ index_filename = deltatar.index_name_func(is_full=False)
index_path = os.path.join("backup_dir2", index_filename)
deltatar.restore_backup("target_dir",
backup_indexes_paths=[index_path, prev_index_path])
prev_index_path)
# first restore initial backup in target_dir
- tar_filename = deltatar.volume_name_func('backup_dir', True, 0)
+ tar_filename = deltatar.volume_name_func('backup_dir', is_full=True, volume_number=0)
tar_path = os.path.join("backup_dir", tar_filename)
deltatar.restore_backup("target_dir", backup_tar_path=tar_path)
# then apply diff backup in target_dir
- index_filename = deltatar.index_name_func(is_full=True)
+ index_filename = deltatar.index_name_func(is_full=False)
index_path = os.path.join("backup_dir2", index_filename)
deltatar.restore_backup("target_dir",
backup_indexes_paths=[index_path, prev_index_path])
prev_index_path)
# first restore initial backup in target_dir
- tar_filename = deltatar.volume_name_func('backup_dir', True, 0)
+ tar_filename = deltatar.volume_name_func('backup_dir', is_full=True, volume_number=0)
tar_path = os.path.join("backup_dir", tar_filename)
deltatar.restore_backup("target_dir", backup_tar_path=tar_path)
self.check_equal_dirs('source_dir', 'target_dir', deltatar)
# then apply diff backup in target_dir
- index_filename = deltatar.index_name_func(is_full=True)
+ index_filename = deltatar.index_name_func(is_full=False)
index_path = os.path.join("backup_dir2", index_filename)
deltatar.restore_backup("target_dir",
backup_indexes_paths=[index_path, prev_index_path])
self.check_equal_dirs('source_dir', 'target_dir', deltatar)
# then apply diff backup in target_dir
- index_filename = deltatar.index_name_func(is_full=True)
+ index_filename = deltatar.index_name_func(is_full=False)
index_path = os.path.join("backup_dir2", index_filename)
deltatar.restore_backup("target_dir",
backup_indexes_paths=[index_path, prev_index_path])