fixing bug when restoring files, mtime of parent dir was not preserved/restored correctly
authorEduardo Robles Elvira <edulix@wadobo.com>
Sat, 28 Sep 2013 09:18:30 +0000 (11:18 +0200)
committerEduardo Robles Elvira <edulix@wadobo.com>
Sat, 28 Sep 2013 09:18:30 +0000 (11:18 +0200)
deltatar/deltatar.py

index 3d624a2..a4cbbf8 100644 (file)
@@ -35,6 +35,8 @@ from . import tarfile
 class NullHandler(logging.Handler):
     def emit(self, record):
         pass
+
+
 logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
 
 
@@ -896,6 +898,67 @@ class DeltaTar(object):
 
         return IndexPathIterator(self, index_path)
 
+    def iterate_tar_path(self, tar_path):
+        '''
+        Returns a tar iterator that iterates jsonized member items that contain
+        an additional "member" field, used by RestoreHelper.
+        '''
+        def TarPathIterator(object):
+            def __init__(self, delta_tar, index_path):
+                self.delta_tar = delta_tar
+                self.index_path = index_path
+                self.tar_obj = None
+                self.__enter__()
+
+            def __iter__(self):
+                return self
+
+            def release(self):
+                if self.tar_obj:
+                    self.tar_obj.close()
+
+            def __enter__(self):
+                '''
+                Allows this iterator to be used with the "with" statement
+                '''
+                if self.tar_obj is None:
+                    self.tar_obj = self.delta_tar.open_index(self.index_path, 'r')
+                return self
+
+            def __exit__(self, type, value, tb):
+                '''
+                Allows this iterator to be used with the "with" statement
+                '''
+                self.tar_obj.close()
+                self.tar_obj = None
+
+            def next(self):
+                '''
+                Read each member and return it as a stat dict
+                '''
+                self.last_member = tarinfo = self.tar_obj.next()
+                ptype = 'unknown'
+                if tarinfo.isfile():
+                    ptype = 'file'
+                elif tarinfo.isdir():
+                    ptype = 'dir'
+                elif tarinfo.islnk() or tarinfo.issym():
+                    ptype = 'link'
+
+                return {
+                    u'type': ptype,
+                    u'path': tarinfo.path,
+                    u'mode': tarinfo.mode,
+                    u'mtime': tarinfo.mtime,
+                    u'ctime': -1, # cannot restore
+                    u'uid': tarinfo.uid,
+                    u'gid': tarinfo.gid,
+                    u'inode': -1, # cannot restore
+                    u'size': tarinfo.size,
+                    u'member': tarinfo
+                }
+        return TarPathIterator(self, tar_path)
+
     def jsonize_path_iterator(self, iter, strip=0):
         '''
         converts the yielded items of an iterator into json path lines.
@@ -1106,118 +1169,72 @@ class DeltaTar(object):
 
         cwd = os.getcwd()
 
-        # wraps some args from context into the handler
-
         if mode == 'tar':
-            def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
-                '''
-                Handles the new volumes
-                '''
-                volume_name = deltarobj.volume_name_func(backup_path, True,
-                    volume_number, guess_name=True)
-                volume_path = os.path.join(backup_path, volume_name)
-
-                # we convert relative paths into absolute because CWD is changed
-                if not os.path.isabs(volume_path):
-                    volume_path = os.path.join(cwd, volume_path)
-                tarobj.open_volume(volume_path)
-
-            backup_path = os.path.dirname(backup_tar_path)
-            if not os.path.isabs(backup_path):
-                backup_path = os.path.join(cwd, backup_path)
-            new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
-            tarobj = tarfile.TarFile.open(backup_tar_path,
-                                mode='r' + self.mode,
-                                format=tarfile.GNU_FORMAT,
-                                concat_compression='#gz' in self.mode,
-                                password=self.password,
-                                new_volume_handler=new_volume_handler)
-            os.chdir(target_path)
-
-            def filter(cls, tarinfo):
-                if tarinfo.path.startswith("snapshot://"):
-                    tarinfo.path = self.unprefixed(tarinfo.path)
-                    tarinfo.name = self.unprefixed(tarinfo.name)
-                    return cls.filter_path(tarinfo.path, '.', tarinfo.isdir()) != NO_MATCH
-                elif tarinfo.path.startswith("delete://"):
-                    path = self.unprefixed(tarinfo.path)
-                    if os.path.exists(path):
-                        if not os.path.isdir(path):
-                            os.unlink(path)
-                        else:
-                            shutil.rmtree(path)
-                    return False
-                else:
-                    return False
-            filter = partial(filter, self)
-
-            tarobj.extractall(filter=filter)
-            os.chdir(cwd)
-            tarobj.close()
+            index_it = self.iterate_tar_path(backup_tar_path)
+            helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
+                                   tarobj=index_it.tarobj)
         elif mode == "diff":
-            os.chdir(target_path)
             helper = RestoreHelper(self, cwd, backup_indexes_paths)
-
             index_it = self.iterate_index_path(helper._data[0]["path"])
-            dir_it = self._recursive_walk_dir('.')
-            dir_path_it = self.jsonize_path_iterator(dir_it)
-
-            # for each file to be in the backup, do:
-            for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
-                if not ipath:
-                    upath = dpath['path']
-                    op_type = dpath['type']
-                else:
-                    upath = self.unprefixed(ipath['path'])
-                    op_type = ipath['type']
 
-                # filter paths
-                if self.filter_path(upath, '.', op_type == 'directory') == NO_MATCH:
-                    continue
+        dir_it = self._recursive_walk_dir('.')
+        dir_path_it = self.jsonize_path_iterator(dir_it)
 
-                # if types of the file mismatch, the file needs to be deleted
-                # and re-restored
-                if ipath is not None and dpath is not None and\
-                        dpath['type'] != ipath['type']:
-                    helper.delete(upath)
+        # for each file to be in the backup, do:
+        for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
+            if not ipath:
+                upath = dpath['path']
+                op_type = dpath['type']
+            else:
+                upath = self.unprefixed(ipath['path'])
+                op_type = ipath['type']
 
-                # if file not found in dpath, we can directly restore from index
-                if not dpath:
-                    # if the file doesn't exist and it needs to be deleted, it
-                    # means that work is already done
-                    if ipath['path'].startswith('delete://'):
-                        continue
-                    try:
-                        helper.restore(ipath, l_no)
-                    except Exception, e:
-                        print "FAILED to restore: ", ipath.get('path', '')
-                    continue
+            # filter paths
+            if self.filter_path(upath, '.', op_type == 'directory') == NO_MATCH:
+                continue
 
-                # if both files are equal, we have nothing to restore
-                if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
+            # if types of the file mismatch, the file needs to be deleted
+            # and re-restored
+            if ipath is not None and dpath is not None and\
+                    dpath['type'] != ipath['type']:
+                helper.delete(upath)
+
+            # if file not found in dpath, we can directly restore from index
+            if not dpath:
+                # if the file doesn't exist and it needs to be deleted, it
+                # means that work is already done
+                if ipath['path'].startswith('delete://'):
                     continue
-
-                # we have to restore the file, but first we need to delete the
-                # current existing file.
-                # we don't delete the file if it's a directory, because it might
-                # just have changed mtime, so it's quite inefficient to remove
-                # it
-                if ipath:
-                    if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
-                        helper.delete(upath)
+                try:
                     helper.restore(ipath, l_no)
+                except Exception, e:
+                    print "FAILED to restore: ", ipath.get('path', '')
+                continue
 
-                # if the file is not in the index (so it comes from the target
-                # directory) then we have to delete it
-                else:
+            # if both files are equal, we have nothing to restore
+            if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
+                continue
+
+            # we have to restore the file, but first we need to delete the
+            # current existing file.
+            # we don't delete the file if it's a directory, because it might
+            # just have changed mtime, so it's quite inefficient to remove
+            # it
+            if ipath:
+                if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
                     helper.delete(upath)
+                helper.restore(ipath, l_no)
+
+            # if the file is not in the index (so it comes from the target
+            # directory) then we have to delete it
+            else:
+                helper.delete(upath)
 
             helper.restore_directories_permissions()
             index_it.release()
             os.chdir(cwd)
             helper.cleanup()
 
-
     def _parse_json_line(self, f, l_no):
         '''
         read from a file and parses a json line and prints it on screen on error
@@ -1231,6 +1248,7 @@ class DeltaTar(object):
                 "(line number %d): %s" % (l_no, l))
         return j, l_no
 
+
 class RestoreHelper(object):
     '''
     Class used to help to restore files from indices
@@ -1247,7 +1265,8 @@ class RestoreHelper(object):
     # tarfile.extractall for details.
     _directories = []
 
-    def __init__(self, deltatar, cwd, index_list):
+    def __init__(self, deltatar, cwd, index_list=[], backup_path=False,
+                 tarobj=None):
         '''
         Constructor opens the tars and init the data structures.
 
@@ -1259,25 +1278,47 @@ class RestoreHelper(object):
         self._cwd = cwd
         self._index_list = index_list
 
-        for index in index_list:
-            is_full = (index == index_list[-1])
-
+        if index_list:
+            for index in index_list:
+                is_full = (index == index_list[-1])
+
+                # make paths absolute to avoid cwd problems
+                if not os.path.isabs(index):
+                    index = os.path.normpath(os.path.join(cwd, index))
+
+                s = dict(
+                    curr_vol_no = None,
+                    vol_fd = None,
+                    offset = -1,
+                    tarobj = None,
+                    path = index,
+                    is_full = is_full,
+                    iterator = None,
+                    last_itelement = None,
+                    last_lno = 0,
+                    new_volume_handler = partial(self.new_volume_handler,
+                        self._deltatar, self._cwd, is_full,
+                        os.path.dirname(index))
+                )
+                self._data.append(s)
+        else:
             # make paths absolute to avoid cwd problems
-            if not os.path.isabs(index):
-                index = os.path.join(cwd, index)
+            if not os.path.isabs(backup_path):
+                backup_path = os.path.normpath(os.path.join(cwd, backup_path))
 
             s = dict(
                 curr_vol_no = None,
                 vol_fd = None,
                 offset = -1,
-                tarobj = None,
-                path = index,
-                is_full = is_full,
+                tarobj = tarobj,
+                path = backup_path,
+                is_full = True,
                 iterator = None,
                 last_itelement = None,
                 last_lno = 0,
                 new_volume_handler = partial(self.new_volume_handler,
-                    self._deltatar, self._cwd, is_full, os.path.dirname(index))
+                    self._deltatar, self._cwd, True,
+                    os.path.dirname(backup_path))
             )
             self._data.append(s)
 
@@ -1297,11 +1338,18 @@ class RestoreHelper(object):
         if not os.path.exists(path):
             return
 
+        # to preserve parent directory mtime, we save it
+        parent_dir = os.path.dirname(path)
+        parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
+
         if os.path.isdir(path):
             shutil.rmtree(path)
         else:
             os.unlink(path)
 
+        # now we restore parent_directory mtime
+        os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
+
     def restore(self, itpath, l_no):
         '''
         Restore the path from the appropiate backup. Receives the current path
@@ -1317,10 +1365,17 @@ class RestoreHelper(object):
         data = self._data[0]
         upath = self._deltatar.unprefixed(path)
 
+        # to preserve parent directory mtime, we save it
+        parent_dir = os.path.dirname(upath)
+        parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
+
         # if path is found in the first index as to be snapshotted, deal with it
         # and finish
         if path.startswith('snapshot://'):
             self.restore_file(itpath, data, path, l_no, self._deltatar.unprefixed(path))
+
+            # now we restore parent_directory mtime
+            os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
             return
 
         # we go from index to index, finding the path in the index, then finding
@@ -1338,6 +1393,9 @@ class RestoreHelper(object):
                 # removed
                 if cur_index == 0:
                     self.delete(path)
+
+                    # now we restore parent_directory mtime
+                    os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
                     return
                 # this means that the path was found in the first index as listed
                 # not in a previous one, so something wrong happened.
@@ -1353,6 +1411,9 @@ class RestoreHelper(object):
                 return
             elif d.get('path', '').startswith('snapshot://'):
                 self.restore_file(d, data, path, l_no, dpath)
+
+                # now we restore parent_directory mtime
+                os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
                 return
             elif d.get('path', '').startswith('list://'):
                 continue
@@ -1453,50 +1514,55 @@ class RestoreHelper(object):
         '''
         Restores a snapshot of a file from a specific backup
         '''
-        vol_no = file_data.get('volume', -1)
         op_type = file_data.get('type', -1)
-
-        # sanity check
-        if not isinstance(vol_no, int) or vol_no < 0:
-            self._deltatar.logger.warn('unrecognized type to be restored: '
-                                       '%s, line %d' % (op_type, l_no))
-
-        # setup the volume that needs to be read
-        if index_data['curr_vol_no'] != vol_no:
-            index_data['curr_vol_no'] = vol_no
-            backup_path = os.path.dirname(index_data['path'])
-            vol_name = self._deltatar.volume_name_func(backup_path,
-                index_data['is_full'], vol_no, guess_name=True)
-            vol_path = os.path.join(backup_path, vol_name)
-            if index_data['vol_fd']:
-                index_data['vol_fd'].close()
-            index_data['vol_fd'] = open(vol_path, 'r')
-
-            # force reopen of the tarobj because of new volume
+        member = file_data.get('member', None)
+
+        # when member is set, then we can assume everything is right and we
+        # just have to restore the path
+        if not member:
+            vol_no = file_data.get('volume', -1)
+            # sanity check
+            if not isinstance(vol_no, int) or vol_no < 0:
+                self._deltatar.logger.warn('unrecognized type to be restored: '
+                                        '%s, line %d' % (op_type, l_no))
+
+            # setup the volume that needs to be read. only needed when member is
+            # not set
+            if not member and index_data['curr_vol_no'] != vol_no:
+                index_data['curr_vol_no'] = vol_no
+                backup_path = os.path.dirname(index_data['path'])
+                vol_name = self._deltatar.volume_name_func(backup_path,
+                    index_data['is_full'], vol_no, guess_name=True)
+                vol_path = os.path.join(backup_path, vol_name)
+                if index_data['vol_fd']:
+                    index_data['vol_fd'].close()
+                index_data['vol_fd'] = open(vol_path, 'r')
+
+                # force reopen of the tarobj because of new volume
+                if index_data['tarobj']:
+                    index_data['tarobj'].close()
+                    index_data['tarobj'] = None
+
+            # seek tarfile if needed
+            offset = file_data.get('offset', -1)
             if index_data['tarobj']:
-                index_data['tarobj'].close()
-                index_data['tarobj'] = None
-
-        # seek tarfile if needed
-        offset = file_data.get('offset', -1)
-        if index_data['tarobj']:
-            member = index_data['tarobj'].next()
-            if not member or member.path != file_data['path']:
-                # force a seek and reopen
-                index_data['tarobj'].close()
-                index_data['tarobj'] = None
-
-        # open the tarfile if needed
-        if not index_data['tarobj']:
-            index_data['vol_fd'].seek(offset)
-            index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
-                fileobj=index_data['vol_fd'],
-                format=tarfile.GNU_FORMAT,
-                concat_compression='#gz' in self._deltatar.mode,
-                password=self._deltatar.password,
-                new_volume_handler=index_data['new_volume_handler'])
-
-            member = index_data['tarobj'].next()
+                member = index_data['tarobj'].next()
+                if not member or member.path != file_data['path']:
+                    # force a seek and reopen
+                    index_data['tarobj'].close()
+                    index_data['tarobj'] = None
+
+            # open the tarfile if needed
+            if not index_data['tarobj']:
+                index_data['vol_fd'].seek(offset)
+                index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
+                    fileobj=index_data['vol_fd'],
+                    format=tarfile.GNU_FORMAT,
+                    concat_compression='#gz' in self._deltatar.mode,
+                    password=self._deltatar.password,
+                    new_volume_handler=index_data['new_volume_handler'])
+
+                member = index_data['tarobj'].next()
 
         member.path = unprefixed
         member.name = unprefixed
@@ -1511,9 +1577,10 @@ class RestoreHelper(object):
             if os.path.exists(member.path):
                 return
 
-        # set current volume number in tarobj, otherwise the extraction of the
-        # file might fail when trying to extract a multivolume member
-        index_data['tarobj'].volume_number = index_data['curr_vol_no']
+        if not member:
+            # set current volume number in tarobj, otherwise the extraction of the
+            # file might fail when trying to extract a multivolume member
+            index_data['tarobj'].volume_number = index_data['curr_vol_no']
 
         # finally, restore the file
         index_data['tarobj'].extract(member)