From fb27c6e888a011ddf37e5bcdbde8995a3d29b752 Mon Sep 17 00:00:00 2001 From: Eduardo Robles Elvira Date: Fri, 27 Sep 2013 16:06:58 +0200 Subject: [PATCH] Fixing corner case where pad was not taken into account when decrypting the end of a file in a stream __dec_read function reads directly from the file and returns the data decrypted. This means that if the file is not encrypted, this function is trivial. If the data in the file is encrypted, then the process is different: first we have to read the raw encrypted data, then decrypt it and return. But the decryption process is not straightforward because the self.fileobj stream contains multiple encrypted files one after the other. We need to detect each separate file, which is detected because they are separated by the "Salted__" keyword. It gets more complicated, because we decrypt chunk by chunk, and to correctly decrypt one chunk we need to set a "last" variable that specifies if it's the last chunk of a file, because the end of a file is handled differently, as it gets padded. Knowing if the current chunk is the last part of a file is usually done just by detecting if it's followed by a "Salted__" keyword or if we cannot read more bytes from the stream. BUT there's a pretty particular case, in which the current chunk ends exactly with one file, so that the next chunk starts with "Salted__". To fix that rare case, we just read N bytes from the stream, and check if the last bytes correspond with the string "Salted__". Then we save those last characters for next call to __dec_read. If the last bytes were "Salted__", then we set "last" to True. Well, actually we not only substract the length of "Salted__", but 16/32 chars because the file is decrypted in multiples of the key size. --- deltatar/tarfile.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 51 insertions(+), 5 deletions(-) diff --git a/deltatar/tarfile.py b/deltatar/tarfile.py index 36c42af..ba8737b 100644 --- a/deltatar/tarfile.py +++ b/deltatar/tarfile.py @@ -438,7 +438,8 @@ class _Stream: self.key_length = key_length self.password = password self.last_block_offset = 0L - self.dbuf = "" + self.dbuf = "" + self.aes_buf = "" if comptype == "gz": try: @@ -760,14 +761,57 @@ class _Stream: self.internal_pos += len(buf) t = "".join(t) self.buf = t[size:] + return t[:size] def __dec_read(self, size): - buf = self.fileobj.read(size) - last = len(buf) < size + ''' + This function reads directly from the file and returns the data + decrypted. This means that if the file is not encrypted, this function + is trivial. + + If the data in the file is encrypted, then the process is different: + first we have to read the raw encrypted data, then decrypt it and + return. But the decryption process is not straightforward because the + self.fileobj stream contains multiple encrypted files one after the + other. We need to detect each separate file, which is detected because + they are separated by the "Salted__" keyword. + + It gets more complicated, because we decrypt chunk by chunk, and to + correctly decrypt one chunk we need to set a "last" variable that + specifies if it's the last chunk of a file, because the end of a file is + handled differently, as it gets padded. + + Knowing if the current chunk is the last part of a file is usually done + just by detecting if it's followed by a "Salted__" keyword or if we + cannot read more bytes from the stream. BUT there's a pretty particular + case, in which the current chunk ends exactly with one file, so that + the next chunk starts with "Salted__". + + To fix that rare case, we just read N bytes from the stream, and check + if the last bytes correspond with the string "Salted__". Then we save + those last characters for next call to __dec_read. If the last bytes + were "Salted__", then we set "last" to True. + + Well, actually we not only substract the length of "Salted__", but 16/32 + chars because the file is decrypted in multiples of the key size. + ''' if self.enctype == 'aes': - buf = self.__split_enc_file(buf, last) - return buf + kl = self.key_length/8 + buf = self.fileobj.read(size - kl) + last = len(buf) < (size - kl) + buf = self.aes_buf + buf + self.aes_buf = "" + + # prevent setting last to False when it shouldn't + if not last: + last = buf[-kl:].startswith('Salted__') + self.aes_buf = buf[-kl:] + buf = buf[:-kl] + + return self.__split_enc_file(buf, last) + else: + return self.fileobj.read(size) def __split_enc_file(self, buf, last): if not buf: @@ -784,6 +828,7 @@ class _Stream: buf = self.encryption.decrypt(b1, True) else: buf = '' + self.encryption.get_salt_str(b2) self.encryption.init() b2 = b2[len(self.encryption.salt_str):] @@ -2952,6 +2997,7 @@ class TarIter: else: raise StopIteration self.index += 1 + return tarinfo # Helper classes for sparse file support -- 1.7.1