draft disaster recovery mode for deltatar
authorPhilipp Gesang <philipp.gesang@intra2net.com>
Tue, 8 Aug 2017 11:58:20 +0000 (13:58 +0200)
committerThomas Jarosch <thomas.jarosch@intra2net.com>
Mon, 2 Apr 2018 11:34:09 +0000 (13:34 +0200)
The first stage recovery assumes the index is intact and all
objects are at their expected position. In this scenario, an
attempt is made to extract each object, keeping track of those
that weren’t readable and why.

backup.py
deltatar/deltatar.py
deltatar/tarfile.py

index b9984c8..06d541d 100644 (file)
--- a/backup.py
+++ b/backup.py
@@ -94,6 +94,8 @@ if __name__ == "__main__":
                         help="Maximum volume size, in megabytes.")
     parser.add_argument("-r", "--restore", action='store_true',
                         help="Restore a backup.")
+    parser.add_argument("-R", "--recover", action='store_true',
+                        help="Restore a backup with an index file.")
     parser.add_argument("-f", "--full", action='store_true',
                         help="Create a full backup.")
     parser.add_argument("-d", "--diff", action='store_true',
@@ -143,6 +145,16 @@ if __name__ == "__main__":
             deltatar.restore_backup(args.targetpath, backup_indexes_paths=args.indexes)
         else:
             deltatar.restore_backup(args.targetpath, backup_tar_path=args.sourcepath)
+    elif args.recover:
+        failed = deltatar.recover_backup(args.targetpath,
+                                         backup_indexes_paths=args.indexes,
+                                         backup_tar_path=args.sourcepath)
+        if len (failed) > 0:
+            logger = logging.getLogger('deltatar.DeltaTar')
+            print ("%d files could not be restored:" % len (failed))
+            for i, f in enumerate (failed):
+                print ("   [%d] %s (%s)" % (i, f [0], f [1]))
+
     elif args.equals:
         check_equal_dirs(os.path.abspath(args.sourcepath), os.path.abspath(args.targetpath), deltatar)
     else:
@@ -151,3 +163,4 @@ if __name__ == "__main__":
               "--restore, --equals.\n", file=sys.stderr)
         parser.print_help(file=sys.stderr)
 
+
index b2d483f..ee5e41b 100644 (file)
@@ -1292,7 +1292,8 @@ class DeltaTar(object):
         tarobj.close()
 
     def restore_backup(self, target_path, backup_indexes_paths=[],
-                       backup_tar_path=None, restore_callback=None):
+                       backup_tar_path=None, restore_callback=None,
+                       disaster=False):
         '''
         Restores a backup.
 
@@ -1316,6 +1317,9 @@ class DeltaTar(object):
 
         NOTE: Indices are assumed to follow the same format as the index_mode
         specified in the constructor.
+
+        Returns the list of files that could not be restored, if there were
+        any.
         '''
         # check/sanitize input
         if not isinstance(target_path, str):
@@ -1386,6 +1390,8 @@ class DeltaTar(object):
         dir_it = self._recursive_walk_dir('.')
         dir_path_it = self.jsonize_path_iterator(dir_it)
 
+        failed = [] # irrecoverable files
+
         # for each file to be restored, do:
         for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
             if not ipath:
@@ -1415,8 +1421,11 @@ class DeltaTar(object):
                     self.logger.debug("restore %s" % ipath['path'])
                     helper.restore(ipath, l_no, restore_callback)
                 except Exception as e:
+                    iipath = ipath.get ("path", "")
                     self.logger.error("FAILED to restore: {} ({})"
-                                      .format(ipath.get('path', ''), e))
+                                      .format(iipath, e))
+                    if disaster is True:
+                        failed.append ((iipath, e))
                 continue
 
             # if both files are equal, we have nothing to restore
@@ -1432,7 +1441,13 @@ class DeltaTar(object):
                 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
                     helper.delete(upath)
                 self.logger.debug("restore %s" % ipath['path'])
-                helper.restore(ipath, l_no, restore_callback)
+                try:
+                    helper.restore(ipath, l_no, restore_callback)
+                except Exception as e:
+                    if disaster is False:
+                        raise
+                    failed.append ((ipath.get ("path", ""), e))
+                    continue
 
             # if the file is not in the index (so it comes from the target
             # directory) then we have to delete it
@@ -1445,6 +1460,20 @@ class DeltaTar(object):
         os.chdir(cwd)
         helper.cleanup()
 
+        return failed
+
+
+    def recover_backup(self, target_path, backup_indexes_paths=[],
+                       restore_callback=None):
+        """
+        Walk the index, extracting objects in disaster mode. Bad files are
+        reported along with a reason.
+        """
+        return self.restore_backup(target_path,
+                                   backup_indexes_paths=backup_indexes_paths,
+                                   disaster=True)
+
+
     def _parse_json_line(self, f, l_no):
         '''
         Read line from file like object and process it as JSON.
@@ -1485,8 +1514,10 @@ class RestoreHelper(object):
     # tarfile.extractall for details.
     _directories = []
 
+    _disaster = False
+
     def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
-                 tarobj=None):
+                 tarobj=None, disaster=False):
         '''
         Constructor opens the tars and init the data structures.
 
@@ -1510,6 +1541,7 @@ class RestoreHelper(object):
         self._password = deltatar.password
         self._crypto_key = deltatar.crypto_key
         self._decryptors = []
+        self._disaster = disaster
 
         try:
             import grp, pwd
@@ -1640,7 +1672,10 @@ class RestoreHelper(object):
         # if path is found in the newest index as to be snapshotted, deal with it
         # and finish
         if path.startswith('snapshot://'):
-            self.restore_file(itpath, data, path, l_no, upath)
+            try:
+                self.restore_file(itpath, data, path, l_no, upath)
+            except Exception:
+                raise
 
             # now we restore parent_directory mtime
             os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
@@ -1808,12 +1843,11 @@ class RestoreHelper(object):
             if index_data['tarobj']:
                 try:
                     member = index_data['tarobj'].__iter__().__next__()
-                except tarfile.ReadError as exn:
-                    # Possibly corrupted archive; may still be recoverable
-                    # if offsets did not change.
-                    index_data['tarobj'].close()
-                    index_data['tarobj'] = None
-                    raise
+                except tarfile.DecryptionError:
+                    pass
+                except tarfile.CompressionError:
+                    pass
+
                 if not member or member.path != file_data['path']:
                     # force a seek and reopen
                     index_data['tarobj'].close()
index f628be2..288ac47 100644 (file)
@@ -365,7 +365,7 @@ class DecryptionError(TarError):
     """Exception for error during decryption."""
     pass
 class EncryptionError(TarError):
-    """Exception for error during decryption."""
+    """Exception for error during encryption."""
     pass
 class EndOfFile(Exception):
     """Signal end of file condition when they’re not an error."""
@@ -465,13 +465,15 @@ class _Stream:
     """
 
     remainder = -1 # track size in encrypted entries
+    tolerant  = False
 
     def __init__(self, name, mode, comptype, fileobj, bufsize,
                  concat=False, encryption=None, enccounter=None,
-                 compresslevel=9):
+                 compresslevel=9, tolerant=False):
         """Construct a _Stream object.
         """
         self.arcmode = arcmode_set (concat, encryption, comptype)
+        self.tolerant = tolerant
 
         self._extfileobj = True
         if fileobj is None:
@@ -1028,6 +1030,8 @@ class _Stream:
                     try:
                         trailing = self._finalize_read_encrypt ()
                     except DecryptionError as exn:
+                        if self.tolerant is False:
+                            raise
                         if good_crypto == 0:
                             raise
                         # some objects did validate; discard all data after it;