From: Eduardo Robles Elvira Date: Sat, 28 Sep 2013 09:18:30 +0000 (+0200) Subject: fixing bug when restoring files, mtime of parent dir was not preserved/restored correctly X-Git-Tag: v2.2~94 X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=24ddf0a26e7b19cc2de3220b243d7ba067ed19d2;p=python-delta-tar fixing bug when restoring files, mtime of parent dir was not preserved/restored correctly --- diff --git a/deltatar/deltatar.py b/deltatar/deltatar.py index 3d624a2..a4cbbf8 100644 --- a/deltatar/deltatar.py +++ b/deltatar/deltatar.py @@ -35,6 +35,8 @@ from . import tarfile class NullHandler(logging.Handler): def emit(self, record): pass + + logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler()) @@ -896,6 +898,67 @@ class DeltaTar(object): return IndexPathIterator(self, index_path) + def iterate_tar_path(self, tar_path): + ''' + Returns a tar iterator that iterates jsonized member items that contain + an additional "member" field, used by RestoreHelper. + ''' + def TarPathIterator(object): + def __init__(self, delta_tar, index_path): + self.delta_tar = delta_tar + self.index_path = index_path + self.tar_obj = None + self.__enter__() + + def __iter__(self): + return self + + def release(self): + if self.tar_obj: + self.tar_obj.close() + + def __enter__(self): + ''' + Allows this iterator to be used with the "with" statement + ''' + if self.tar_obj is None: + self.tar_obj = self.delta_tar.open_index(self.index_path, 'r') + return self + + def __exit__(self, type, value, tb): + ''' + Allows this iterator to be used with the "with" statement + ''' + self.tar_obj.close() + self.tar_obj = None + + def next(self): + ''' + Read each member and return it as a stat dict + ''' + self.last_member = tarinfo = self.tar_obj.next() + ptype = 'unknown' + if tarinfo.isfile(): + ptype = 'file' + elif tarinfo.isdir(): + ptype = 'dir' + elif tarinfo.islnk() or tarinfo.issym(): + ptype = 'link' + + return { + u'type': ptype, + u'path': tarinfo.path, + u'mode': tarinfo.mode, + u'mtime': tarinfo.mtime, + u'ctime': -1, # cannot restore + u'uid': tarinfo.uid, + u'gid': tarinfo.gid, + u'inode': -1, # cannot restore + u'size': tarinfo.size, + u'member': tarinfo + } + return TarPathIterator(self, tar_path) + def jsonize_path_iterator(self, iter, strip=0): ''' converts the yielded items of an iterator into json path lines. @@ -1106,118 +1169,72 @@ class DeltaTar(object): cwd = os.getcwd() - # wraps some args from context into the handler - if mode == 'tar': - def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number): - ''' - Handles the new volumes - ''' - volume_name = deltarobj.volume_name_func(backup_path, True, - volume_number, guess_name=True) - volume_path = os.path.join(backup_path, volume_name) - - # we convert relative paths into absolute because CWD is changed - if not os.path.isabs(volume_path): - volume_path = os.path.join(cwd, volume_path) - tarobj.open_volume(volume_path) - - backup_path = os.path.dirname(backup_tar_path) - if not os.path.isabs(backup_path): - backup_path = os.path.join(cwd, backup_path) - new_volume_handler = partial(new_volume_handler, self, cwd, backup_path) - tarobj = tarfile.TarFile.open(backup_tar_path, - mode='r' + self.mode, - format=tarfile.GNU_FORMAT, - concat_compression='#gz' in self.mode, - password=self.password, - new_volume_handler=new_volume_handler) - os.chdir(target_path) - - def filter(cls, tarinfo): - if tarinfo.path.startswith("snapshot://"): - tarinfo.path = self.unprefixed(tarinfo.path) - tarinfo.name = self.unprefixed(tarinfo.name) - return cls.filter_path(tarinfo.path, '.', tarinfo.isdir()) != NO_MATCH - elif tarinfo.path.startswith("delete://"): - path = self.unprefixed(tarinfo.path) - if os.path.exists(path): - if not os.path.isdir(path): - os.unlink(path) - else: - shutil.rmtree(path) - return False - else: - return False - filter = partial(filter, self) - - tarobj.extractall(filter=filter) - os.chdir(cwd) - tarobj.close() + index_it = self.iterate_tar_path(backup_tar_path) + helper = RestoreHelper(self, cwd, backup_path=backup_tar_path, + tarobj=index_it.tarobj) elif mode == "diff": - os.chdir(target_path) helper = RestoreHelper(self, cwd, backup_indexes_paths) - index_it = self.iterate_index_path(helper._data[0]["path"]) - dir_it = self._recursive_walk_dir('.') - dir_path_it = self.jsonize_path_iterator(dir_it) - - # for each file to be in the backup, do: - for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it): - if not ipath: - upath = dpath['path'] - op_type = dpath['type'] - else: - upath = self.unprefixed(ipath['path']) - op_type = ipath['type'] - # filter paths - if self.filter_path(upath, '.', op_type == 'directory') == NO_MATCH: - continue + dir_it = self._recursive_walk_dir('.') + dir_path_it = self.jsonize_path_iterator(dir_it) - # if types of the file mismatch, the file needs to be deleted - # and re-restored - if ipath is not None and dpath is not None and\ - dpath['type'] != ipath['type']: - helper.delete(upath) + # for each file to be in the backup, do: + for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it): + if not ipath: + upath = dpath['path'] + op_type = dpath['type'] + else: + upath = self.unprefixed(ipath['path']) + op_type = ipath['type'] - # if file not found in dpath, we can directly restore from index - if not dpath: - # if the file doesn't exist and it needs to be deleted, it - # means that work is already done - if ipath['path'].startswith('delete://'): - continue - try: - helper.restore(ipath, l_no) - except Exception, e: - print "FAILED to restore: ", ipath.get('path', '') - continue + # filter paths + if self.filter_path(upath, '.', op_type == 'directory') == NO_MATCH: + continue - # if both files are equal, we have nothing to restore - if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True): + # if types of the file mismatch, the file needs to be deleted + # and re-restored + if ipath is not None and dpath is not None and\ + dpath['type'] != ipath['type']: + helper.delete(upath) + + # if file not found in dpath, we can directly restore from index + if not dpath: + # if the file doesn't exist and it needs to be deleted, it + # means that work is already done + if ipath['path'].startswith('delete://'): continue - - # we have to restore the file, but first we need to delete the - # current existing file. - # we don't delete the file if it's a directory, because it might - # just have changed mtime, so it's quite inefficient to remove - # it - if ipath: - if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'): - helper.delete(upath) + try: helper.restore(ipath, l_no) + except Exception, e: + print "FAILED to restore: ", ipath.get('path', '') + continue - # if the file is not in the index (so it comes from the target - # directory) then we have to delete it - else: + # if both files are equal, we have nothing to restore + if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True): + continue + + # we have to restore the file, but first we need to delete the + # current existing file. + # we don't delete the file if it's a directory, because it might + # just have changed mtime, so it's quite inefficient to remove + # it + if ipath: + if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'): helper.delete(upath) + helper.restore(ipath, l_no) + + # if the file is not in the index (so it comes from the target + # directory) then we have to delete it + else: + helper.delete(upath) helper.restore_directories_permissions() index_it.release() os.chdir(cwd) helper.cleanup() - def _parse_json_line(self, f, l_no): ''' read from a file and parses a json line and prints it on screen on error @@ -1231,6 +1248,7 @@ class DeltaTar(object): "(line number %d): %s" % (l_no, l)) return j, l_no + class RestoreHelper(object): ''' Class used to help to restore files from indices @@ -1247,7 +1265,8 @@ class RestoreHelper(object): # tarfile.extractall for details. _directories = [] - def __init__(self, deltatar, cwd, index_list): + def __init__(self, deltatar, cwd, index_list=[], backup_path=False, + tarobj=None): ''' Constructor opens the tars and init the data structures. @@ -1259,25 +1278,47 @@ class RestoreHelper(object): self._cwd = cwd self._index_list = index_list - for index in index_list: - is_full = (index == index_list[-1]) - + if index_list: + for index in index_list: + is_full = (index == index_list[-1]) + + # make paths absolute to avoid cwd problems + if not os.path.isabs(index): + index = os.path.normpath(os.path.join(cwd, index)) + + s = dict( + curr_vol_no = None, + vol_fd = None, + offset = -1, + tarobj = None, + path = index, + is_full = is_full, + iterator = None, + last_itelement = None, + last_lno = 0, + new_volume_handler = partial(self.new_volume_handler, + self._deltatar, self._cwd, is_full, + os.path.dirname(index)) + ) + self._data.append(s) + else: # make paths absolute to avoid cwd problems - if not os.path.isabs(index): - index = os.path.join(cwd, index) + if not os.path.isabs(backup_path): + backup_path = os.path.normpath(os.path.join(cwd, backup_path)) s = dict( curr_vol_no = None, vol_fd = None, offset = -1, - tarobj = None, - path = index, - is_full = is_full, + tarobj = tarobj, + path = backup_path, + is_full = True, iterator = None, last_itelement = None, last_lno = 0, new_volume_handler = partial(self.new_volume_handler, - self._deltatar, self._cwd, is_full, os.path.dirname(index)) + self._deltatar, self._cwd, True, + os.path.dirname(backup_path)) ) self._data.append(s) @@ -1297,11 +1338,18 @@ class RestoreHelper(object): if not os.path.exists(path): return + # to preserve parent directory mtime, we save it + parent_dir = os.path.dirname(path) + parent_dir_mtime = int(os.stat(parent_dir).st_mtime) + if os.path.isdir(path): shutil.rmtree(path) else: os.unlink(path) + # now we restore parent_directory mtime + os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime)) + def restore(self, itpath, l_no): ''' Restore the path from the appropiate backup. Receives the current path @@ -1317,10 +1365,17 @@ class RestoreHelper(object): data = self._data[0] upath = self._deltatar.unprefixed(path) + # to preserve parent directory mtime, we save it + parent_dir = os.path.dirname(upath) + parent_dir_mtime = int(os.stat(parent_dir).st_mtime) + # if path is found in the first index as to be snapshotted, deal with it # and finish if path.startswith('snapshot://'): self.restore_file(itpath, data, path, l_no, self._deltatar.unprefixed(path)) + + # now we restore parent_directory mtime + os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime)) return # we go from index to index, finding the path in the index, then finding @@ -1338,6 +1393,9 @@ class RestoreHelper(object): # removed if cur_index == 0: self.delete(path) + + # now we restore parent_directory mtime + os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime)) return # this means that the path was found in the first index as listed # not in a previous one, so something wrong happened. @@ -1353,6 +1411,9 @@ class RestoreHelper(object): return elif d.get('path', '').startswith('snapshot://'): self.restore_file(d, data, path, l_no, dpath) + + # now we restore parent_directory mtime + os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime)) return elif d.get('path', '').startswith('list://'): continue @@ -1453,50 +1514,55 @@ class RestoreHelper(object): ''' Restores a snapshot of a file from a specific backup ''' - vol_no = file_data.get('volume', -1) op_type = file_data.get('type', -1) - - # sanity check - if not isinstance(vol_no, int) or vol_no < 0: - self._deltatar.logger.warn('unrecognized type to be restored: ' - '%s, line %d' % (op_type, l_no)) - - # setup the volume that needs to be read - if index_data['curr_vol_no'] != vol_no: - index_data['curr_vol_no'] = vol_no - backup_path = os.path.dirname(index_data['path']) - vol_name = self._deltatar.volume_name_func(backup_path, - index_data['is_full'], vol_no, guess_name=True) - vol_path = os.path.join(backup_path, vol_name) - if index_data['vol_fd']: - index_data['vol_fd'].close() - index_data['vol_fd'] = open(vol_path, 'r') - - # force reopen of the tarobj because of new volume + member = file_data.get('member', None) + + # when member is set, then we can assume everything is right and we + # just have to restore the path + if not member: + vol_no = file_data.get('volume', -1) + # sanity check + if not isinstance(vol_no, int) or vol_no < 0: + self._deltatar.logger.warn('unrecognized type to be restored: ' + '%s, line %d' % (op_type, l_no)) + + # setup the volume that needs to be read. only needed when member is + # not set + if not member and index_data['curr_vol_no'] != vol_no: + index_data['curr_vol_no'] = vol_no + backup_path = os.path.dirname(index_data['path']) + vol_name = self._deltatar.volume_name_func(backup_path, + index_data['is_full'], vol_no, guess_name=True) + vol_path = os.path.join(backup_path, vol_name) + if index_data['vol_fd']: + index_data['vol_fd'].close() + index_data['vol_fd'] = open(vol_path, 'r') + + # force reopen of the tarobj because of new volume + if index_data['tarobj']: + index_data['tarobj'].close() + index_data['tarobj'] = None + + # seek tarfile if needed + offset = file_data.get('offset', -1) if index_data['tarobj']: - index_data['tarobj'].close() - index_data['tarobj'] = None - - # seek tarfile if needed - offset = file_data.get('offset', -1) - if index_data['tarobj']: - member = index_data['tarobj'].next() - if not member or member.path != file_data['path']: - # force a seek and reopen - index_data['tarobj'].close() - index_data['tarobj'] = None - - # open the tarfile if needed - if not index_data['tarobj']: - index_data['vol_fd'].seek(offset) - index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode, - fileobj=index_data['vol_fd'], - format=tarfile.GNU_FORMAT, - concat_compression='#gz' in self._deltatar.mode, - password=self._deltatar.password, - new_volume_handler=index_data['new_volume_handler']) - - member = index_data['tarobj'].next() + member = index_data['tarobj'].next() + if not member or member.path != file_data['path']: + # force a seek and reopen + index_data['tarobj'].close() + index_data['tarobj'] = None + + # open the tarfile if needed + if not index_data['tarobj']: + index_data['vol_fd'].seek(offset) + index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode, + fileobj=index_data['vol_fd'], + format=tarfile.GNU_FORMAT, + concat_compression='#gz' in self._deltatar.mode, + password=self._deltatar.password, + new_volume_handler=index_data['new_volume_handler']) + + member = index_data['tarobj'].next() member.path = unprefixed member.name = unprefixed @@ -1511,9 +1577,10 @@ class RestoreHelper(object): if os.path.exists(member.path): return - # set current volume number in tarobj, otherwise the extraction of the - # file might fail when trying to extract a multivolume member - index_data['tarobj'].volume_number = index_data['curr_vol_no'] + if not member: + # set current volume number in tarobj, otherwise the extraction of the + # file might fail when trying to extract a multivolume member + index_data['tarobj'].volume_number = index_data['curr_vol_no'] # finally, restore the file index_data['tarobj'].extract(member)