Fixing corner case where pad was not taken into account when decrypting the end of...

author Eduardo Robles Elvira <edulix@wadobo.com>

Fri, 27 Sep 2013 14:06:58 +0000 (16:06 +0200)

committer Eduardo Robles Elvira <edulix@wadobo.com>

Fri, 27 Sep 2013 14:06:58 +0000 (16:06 +0200)
author Eduardo Robles Elvira <edulix@wadobo.com>
Fri, 27 Sep 2013 14:06:58 +0000 (16:06 +0200)
committer Eduardo Robles Elvira <edulix@wadobo.com>
Fri, 27 Sep 2013 14:06:58 +0000 (16:06 +0200)
diff --git a/deltatar/tarfile.py b/deltatar/tarfile.py

index 36c42af..ba8737b 100644 (file)
--- a/deltatar/tarfile.py
+++ b/deltatar/tarfile.py
@@ -438,7 +438,8 @@ class _Stream:
         self.key_length = key_length
         self.password = password
         self.last_block_offset = 0L
-        self.dbuf = ""
+        self.dbuf     = ""
+        self.aes_buf  = ""
 
         if comptype == "gz":
             try:
@@ -760,14 +761,57 @@ class _Stream:
             self.internal_pos += len(buf)
         t = "".join(t)
         self.buf = t[size:]
+
         return t[:size]
 
     def __dec_read(self, size):
-        buf = self.fileobj.read(size)
-        last = len(buf) < size
+        '''
+        This function reads directly from the file and returns the data
+        decrypted. This means that if the file is not encrypted, this function
+        is trivial.
+
+        If the data in the file is encrypted, then the process is different:
+        first we have to read the raw encrypted data, then decrypt it and
+        return. But the decryption process is not straightforward because the
+        self.fileobj stream contains multiple encrypted files one after the
+        other. We need to detect each separate file, which is detected because
+        they are separated by the "Salted__" keyword.
+
+        It gets more complicated, because we decrypt chunk by chunk, and to
+        correctly decrypt one chunk we need to set a "last" variable that
+        specifies if it's the last chunk of a file, because the end of a file is
+        handled differently, as it gets padded.
+
+        Knowing if the current chunk is the last part of a file is usually done
+        just by detecting if it's followed by a "Salted__" keyword or if we
+        cannot read more bytes from the stream. BUT there's a pretty particular
+        case, in which the current chunk ends exactly with one file, so that
+        the next chunk starts with "Salted__".
+
+        To fix that rare case, we just read N bytes from the stream, and check
+        if the last bytes correspond with the string "Salted__". Then we save
+        those last characters for next call to __dec_read. If the last bytes
+        were "Salted__", then we set "last" to True.
+
+        Well, actually we not only substract the length of "Salted__", but 16/32
+        chars because the file is decrypted in multiples of the key size.
+        '''
         if self.enctype == 'aes':
-            buf = self.__split_enc_file(buf, last)
-        return buf
+            kl = self.key_length/8
+            buf = self.fileobj.read(size - kl)
+            last = len(buf) < (size - kl)
+            buf = self.aes_buf + buf
+            self.aes_buf = ""
+
+            # prevent setting last to False when it shouldn't
+            if not last:
+                last = buf[-kl:].startswith('Salted__')
+                self.aes_buf = buf[-kl:]
+                buf = buf[:-kl]
+
+            return self.__split_enc_file(buf, last)
+        else:
+            return self.fileobj.read(size)
 
     def __split_enc_file(self, buf, last):
         if not buf:
@@ -784,6 +828,7 @@ class _Stream:
                 buf = self.encryption.decrypt(b1, True)
             else:
                 buf = ''
+
             self.encryption.get_salt_str(b2)
             self.encryption.init()
             b2 = b2[len(self.encryption.salt_str):]
@@ -2952,6 +2997,7 @@ class TarIter:
         else:
             raise StopIteration
         self.index += 1
+
         return tarinfo
 
 # Helper classes for sparse file support
author	Eduardo Robles Elvira <edulix@wadobo.com>
	Fri, 27 Sep 2013 14:06:58 +0000 (16:06 +0200)
committer	Eduardo Robles Elvira <edulix@wadobo.com>
	Fri, 27 Sep 2013 14:06:58 +0000 (16:06 +0200)