do not discard valid data in buffers when in tolerant mode
authorPhilipp Gesang <philipp.gesang@intra2net.com>
Tue, 15 Aug 2017 08:31:15 +0000 (10:31 +0200)
committerThomas Jarosch <thomas.jarosch@intra2net.com>
Mon, 2 Apr 2018 11:34:09 +0000 (13:34 +0200)
Both decryption and decompression will fail on the first error
and ignore any results of earlier passes. In normal operation,
the hard failures are desirable to indicate a bad backup set.

However, in tolerant / recovery mode the error handling is closer
to the opposite extreme: we want to retrieve every last byte that
made it through the various layers and only skip over the parts
that cannot be interpreted at all.

deltatar/crypto.py
deltatar/tarfile.py

index 930d1f9..16c50d1 100755 (executable)
@@ -818,6 +818,14 @@ class Crypto (object):
         return self.stats ["obj"], self.stats ["in"], self.stats ["out"]
 
 
+    def drop (self):
+        """
+        Clear the current context regardless of its finalization state. The
+        next operation must be ``.next()``.
+        """
+        self.enc = None
+
+
 class Encrypt (Crypto):
 
     lastinfo     = None
index 526c1ac..a57983f 100644 (file)
@@ -883,6 +883,19 @@ class _Stream:
         return True
 
 
+    def _read_encrypt (self, buf):
+        """
+        Demote a program error to a decryption error in tolerant mode. This
+        allows recovery from corrupted headers and invalid data.
+        """
+        try:
+            return self.encryption.process (buf)
+        except RuntimeError as exn:
+            if self.tolerant is True:
+                raise DecryptionError (exn)
+            raise
+
+
     def _finalize_read_encrypt (self):
         """
         Finalize decryption.
@@ -994,6 +1007,14 @@ class _Stream:
                     self.close(close_fileobj=False)
                     try:
                         self._init_read_gz()
+                    except DecryptionError:
+                        if self.tolerant is True:
+                            # return whatever data was processed successfully
+                            if len (buf) > 0:
+                                t.append (buf)
+                            if len (t) > 0:
+                                break
+                            raise
                     except EndOfFile:
                         # happens at the end of the file
                         pass
@@ -1016,50 +1037,46 @@ class _Stream:
         finalized object is returned.
         """
         c = len(self.buf)
-        t = [self.buf]
+        t = [self.buf] if c > 0 else []
         good_crypto = len (t)
+
         while c < size:
             todo = size
-            if self.arcmode & ARCMODE_ENCRYPT:
-                if self.remainder <= 0:
-                    # prepare next object
-                    try:
+            try:
+                if self.arcmode & ARCMODE_ENCRYPT:
+                    if self.remainder <= 0:
+                        # prepare next object
                         if self._init_read_encrypt () is False: # EOF
                             buf = None
                             break # while
-                    except DecryptionError:
-                        if self.tolerant is True:
-                            self.buf = b"".join (t [good_crypto:])
-                            return b"".join (t [:good_crypto])
-                        raise
-
-                # only read up to the end of the encrypted object
-                todo = min (size, self.remainder)
-            buf = self.fileobj.read(todo)
-            if self.arcmode & ARCMODE_ENCRYPT:
-                # decrypt the thing
-                buf = self.encryption.process (buf)
-                if todo == self.remainder:
-                    # at the end of a crypto object; finalization will fail if
-                    # the GCM tag does not match
-                    try:
+
+                    # only read up to the end of the encrypted object
+                    todo = min (size, self.remainder)
+                buf = self.fileobj.read(todo)
+                if self.arcmode & ARCMODE_ENCRYPT:
+                    # decrypt the thing
+                    buf = self._read_encrypt (buf)
+                    if todo == self.remainder:
+                        # at the end of a crypto object; finalization will fail if
+                        # the GCM tag does not match
                         trailing = self._finalize_read_encrypt ()
-                    except DecryptionError as exn:
-                        if self.tolerant is False:
-                            raise
-                        if good_crypto == 0:
-                            raise
-                        # some objects did validate; discard all data after it;
-                        # next call will start with the bad object and error
-                        # out immediately
-                        self.buf = b"".join (t [good_crypto:])
-                        return b"".join (t [:good_crypto])
-                    good_crypto = len (t) + 1
-                    if len (trailing) > 0:
-                        buf += trailing
-                    self.remainder = 0
-                else:
-                    self.remainder -= todo
+                        good_crypto = len (t) + 1
+                        if len (trailing) > 0:
+                            buf += trailing
+                        self.remainder = 0
+                    else:
+                        self.remainder -= todo
+            except DecryptionError:
+                if self.tolerant is False:
+                    raise
+                self.encryption.drop ()
+                if good_crypto == 0:
+                    raise
+                # this may occur at any of the three crypto operations above.
+                # some objects did validate; discard all data after it; next
+                # call will start with the bad object and error out immediately
+                self.buf = b"".join (t [good_crypto:])
+                return b"".join (t [:good_crypto])
 
             if not buf: ## XXX stream terminated prematurely; this should be an error
                 break