From: Philipp Gesang Date: Tue, 21 Mar 2017 12:33:16 +0000 (+0100) Subject: first draft of revised encryption layer X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=a9796633e081426ed5cdd1793acb9675b0f515a8;p=python-delta-tar first draft of revised encryption layer WIP. Tested for encryption (no compression) only, and only for TOC listings (-l). Decryption is handled entirely by the stream, traversing the archive entry wise relying only one the header informations. Encryption requires poking the _Stream thingy to initiate a new crypto entry. --- diff --git a/deltatar/tarfile.py b/deltatar/tarfile.py index 4266a40..14b6b63 100644 --- a/deltatar/tarfile.py +++ b/deltatar/tarfile.py @@ -29,6 +29,8 @@ """Read from and write to tar format archives. """ +I2N_XXX_ENCRYPTION_VERSION = 0 + __version__ = "$Revision: 85213 $" # $Source$ @@ -41,6 +43,7 @@ __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend, Eduardo Robl #--------- # Imports #--------- +import binascii import sys import os import io @@ -53,6 +56,8 @@ import copy import re import operator +import traceback # XXX + from . import crypto try: @@ -271,7 +276,7 @@ def copyfileobj(src, dst, length=None): dst.write(buf) if len(buf) < remainder: raise OSError("end of file reached") - return + def filemode(mode): """Deprecated in this location; use stat.filemode.""" @@ -319,6 +324,9 @@ class InvalidEncryptionError(TarError): class DecryptionError(TarError): """Exception for error during decryption.""" pass +class EncryptionError(TarError): + """Exception for error during decryption.""" + pass #--------------------------- # internal stream interface @@ -332,7 +340,7 @@ class _LowLevelFile: def __init__(self, name, mode): _mode = { "r": os.O_RDONLY, - "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, + "w": os.O_RDWR | os.O_CREAT | os.O_TRUNC, }[mode] if hasattr(os, "O_BINARY"): _mode |= os.O_BINARY # pylint: disable=no-member @@ -354,6 +362,10 @@ class _LowLevelFile: def tell(self): return self.offset + def seek_set (self, pos): + os.lseek (self.fd, pos, os.SEEK_SET) + self.offset = pos + class _Stream: """Class that serves as an adapter between TarFile and @@ -366,9 +378,11 @@ class _Stream: _Stream is intended to be used only internally. """ + remainder = -1 # track size in encrypted entries + def __init__(self, name, mode, comptype, fileobj, bufsize, - concat_stream=False, encver=None, password="", - compresslevel=9): + concat_stream=False, encver=None, password=None, + nacl=None, compresslevel=9): """Construct a _Stream object. """ self._extfileobj = True @@ -394,14 +408,16 @@ class _Stream: self.flags = 0 self.internal_pos = 0 self.concat_stream = concat_stream - self.encver = encver - self.password = password self.last_block_offset = 0 self.dbuf = b"" # ??? - self.aes_buf = b"" # ??? self.exception = None self.compresslevel = compresslevel self.bytes_written = 0 + # crypto parameters + self.encver = encver + self.password = password + self.encryption = None + self.lasthdr = None try: if comptype == "gz": @@ -411,18 +427,35 @@ class _Stream: raise CompressionError("zlib module is not available") self.zlib = zlib if mode == "r": + self._init_read_gz() if self.encver is not None: + if password is None: + raise InvalidEncryptionError \ + ("encryption requested (v=%d) but no password given" + % encver) try: - enc = crypto.Crypto (crypto.DECRYPT, pw, nacl, 1) + enc = crypto.Decrypt (password) except ValueError as exn: raise InvalidEncryptionError \ - ("ctor failed (%r, , “%s”, %r)" - % (crypto.DECRYPT, nacl, 1)) + ("ctor failed crypto.Decrypt()") self.encryption = enc - self._init_read_gz() - self.exception = zlib.error else: + if self.encver is None: + # Layers are stacked differently: initialization is + # necessary per file. + if password is None: + raise InvalidEncryptionError \ + ("encryption requested (v=%d) but no password given" + % encver) + try: + enc = crypto.Encrypt (password, I2N_XXX_ENCRYPTION_VERSION, nacl) + except ValueError as exn: + raise InvalidEncryptionError \ + ("ctor failed crypto.Encrypt(, “%s”, %r)" + % (nacl, 1)) + self.encryption = enc self._init_write_gz() + self.exception = zlib.error # XXX what for? seems unused self.crc = zlib.crc32(b"") & 0xFFFFffff elif comptype == "bz2": @@ -446,45 +479,50 @@ class _Stream: raise InvalidEncryptionError("encryption version %r not " "available for compression %s" % (encver, comptype)) - try: - import lzma - except ImportError: - raise CompressionError("lzma module is not available") - if mode == "r": - self.dbuf = b"" - self.cmp = lzma.LZMADecompressor() - self.exception = lzma.LZMAError - else: - self.cmp = lzma.LZMACompressor() + try: + import lzma + except ImportError: + raise CompressionError("lzma module is not available") + if mode == "r": + self.dbuf = b"" + self.cmp = lzma.LZMADecompressor() + self.exception = lzma.LZMAError + else: + self.cmp = lzma.LZMACompressor() + + elif comptype != "tar": + if self.encver is not None: + raise InvalidEncryptionError("encryption version %r not " + "available for compression %s" + % (encver, comptype)) + raise CompressionError("unknown compression type %r" % comptype) - elif self.encver is not None: + else: # no compression if mode == "r": + if password is None: + raise InvalidEncryptionError \ + ("encryption requested (v=%d) but no password given" + % encver) try: - enc = crypto.Crypto (crypto.DECRYPT, pw, nacl, 1) + enc = crypto.Decrypt (password) except ValueError as exn: raise InvalidEncryptionError \ - ("ctor failed (%r, , “%s”, %r)" - % (crypto.DECRYPT, nacl, 1)) + ("ctor failed crypto.Decrypt()") self.encryption = enc - else: + elif mode == "w": + if password is None: + raise InvalidEncryptionError \ + ("encryption requested (v=%d) but no password given" + % encver) try: - enc = crypto.Crypto (crypto.ENCRYPT, pw, nacl, 1) + enc = crypto.Encrypt (password, I2N_XXX_ENCRYPTION_VERSION, nacl) except ValueError as exn: raise InvalidEncryptionError \ - ("ctor failed (%r, , “%s”, %r)" - % (crypto.DECRYPT, nacl, 1)) + ("ctor failed crypto.Encrypt(, “%s”, %r)" + % (nacl, 1)) self.encryption = enc - self.encryption.init() - self.__write_to_file(self.encryption.salt_str) - elif comptype != "tar": - if self.encver is not None: - raise InvalidEncryptionError("encryption version %r not " - "available for compression %s" - % (encver, comptype)) - raise CompressionError("unknown compression type %r" % comptype) - - except: + except: # XXX seriously? if not self._extfileobj: self.fileobj.close() self.closed = True @@ -494,36 +532,71 @@ class _Stream: if hasattr(self, "closed") and not self.closed: self.close() + + def _init_write_encrypt (self, entry=None): + """Save position for delayed write of header; fill the header location + with dummy bytes.""" + if self.encryption is not None: + # first thing, proclaim new object to the encryption context + # secondly, assemble the header with the updated parameters + # and commit it directly to the underlying stream, bypassing the + # encryption layer in .__write(). + dummyhdr = self.encryption.next (entry, + version=DELTATAR_HEADER_VERSION, + paramversion=self.encver, + nacl=self.encryption.nacl) + if dummyhdr is None: + raise EncryptionError ("Crypto.next(): bad dummy header") # XXX + + self.lasthdr = self.fileobj.tell() + self.__write_to_file(dummyhdr) + + + def _finalize_write_encrypt (self): + """ + Seek back to header position, read dummy bytes, finalize crypto + obtaining the actual header, write header, seek back to current + position. + """ + if self.encryption is not None \ + and self.lasthdr is not None : + self.__sync () + pos0 = self.fileobj.tell () + self.fileobj.seek_set (self.lasthdr) + dummy = self.fileobj.read (crypto.I2N_HDR_SIZE) + pos1 = self.fileobj.tell () + dpos = pos1 - self.lasthdr + assert dpos == crypto.I2N_HDR_SIZE + data, hdr = self.encryption.done (dummy) + self.fileobj.seek_set (self.lasthdr) + self.__write_to_file(hdr) + self.fileobj.seek_set (pos0) + self.__write_to_file(data) # append remainder of data + self.lasthdr = -1 + + def _init_write_gz(self): """Initialize for writing with gzip compression. """ + if getattr(self, "zlib", None) is None: + return self.cmp = self.zlib.compressobj(self.compresslevel, self.zlib.DEFLATED, -self.zlib.MAX_WBITS, self.zlib.DEF_MEM_LEVEL, 0) - - # if aes, we encrypt after compression - if self.encver is not None: - hdr = crypto.hdr_from_params \ - (version=DELTATAR_HEADER_VERSION, - paramversion=self.encver, - nacl=self.encryption.salt_str, - iv=self.encryption.iv, - ctsize=self.encryption.ctsize) - self.__write_to_file(hdr) - timestamp = struct.pack("= 0 + if self.remainder > 0: + # should only be happening with the last item in the archive + #zeroes = self.fileobj.read (self.remainder) + #pred_all_zero = lambda c: c == 0 + #if all (pred_all_zero (c) for c in zeroes) is False: + # # this isn’t the last two blocks; something is messed up + # raise DecryptionError ("trailing blocks of %d B contain " + # "non-zero bytes" % self.remainder) + #assert len (zeroes) == self.remainder + self.remainder = 0 + ok, data = self.encryption.done () + if ok is False: # XXX handle error + print(">> !!!!!!! %s" % data) + print(">> bad tag %s" % self.encryption.tag) + return data + + def tell(self): """Return the stream's file pointer position. """ @@ -790,62 +911,11 @@ class _Stream: t = [self.dbuf] l_buf = self.bufsize # not mutated - if self.encver is not None: - tag1 = None # carry if spanning block bounds - l_tag = crypto.I2N_TLR_SIZE_TAG - b_tag = size - l_tag - e_tag = size - while c < size: buf = self.__read(l_buf) if not buf: break - if self.encver is not None: - tag = None - r = len (buf) - cr = c + r - rem = size - cr - - if tag1 is not None: # read rest of tag - tag = tag1 + buf - assert len (tag) == l_tag - elif rem == 0: - split = l_buf - l_tag - ctxt = buf [0 : split] - tag = buf [split : ] - elif rem < 0: - if r == l_tag: # read entire tag only - ctxt = None - tag = buf - else: # r > l_tag - split = r - l_buf - ctxt = buf [0 : split] - tag = buf [split : ] - elif cr > b_tag: # rem > 0 ∧ tag bleeding into next block - got = cr - b_tag - split = r - got - ctxt = buf [0 : split] - tag1 = buf [split : ] # see “scope” above - else: # entire buffer is data - ctxt = buf - - if ctxt is not None: - ok, buf = self.encryption.process_chunk (ctxt) - if ok is False: - raise - raise DecryptionError("error decrypting [%d:%d)" - % (c, cr)) - if tag is not None: - try: - ok, ret, _ = self.encryption.done (tag) - except cryptography.InvalidTag as exn: - raise DecryptionError("authentication tag mismatch") - if ok is False: - raise DecryptionError("error finalizing stream: %s" - % ret) - break # tag valid ∧ no further data - if self.comptype != "tar": try: buf = self.cmp.decompress(buf) @@ -884,11 +954,33 @@ class _Stream: """ c = len(self.buf) t = [self.buf] + i = 0 while c < size: - buf = self.fileobj.read(size) - if not buf: - ## XXX stream terminated prematurely; this should be an error + i += 1 + todo = size + if self.encryption is not None: + if self.remainder <= 0: + # prepare next object + self._init_read_encrypt () + # only read up to the end of the encrypted object + todo = min (size, self.remainder) + buf = self.fileobj.read(todo) + if self.encryption is not None: + # decrypt the thing + buf = self.encryption.process (buf) + if todo == self.remainder: + # at the end of a crypto object; finalization will fail if + # the GCM tag does not match + trailing = self._finalize_read_encrypt () + if len (trailing) > 0: + buf += trailing + self.remainder = 0 + else: + self.remainder -= todo + + if not buf: ## XXX stream terminated prematurely; this should be an error break + t.append(buf) c += len(buf) self.internal_pos += len(buf) @@ -1788,7 +1880,8 @@ class TarFile(object): tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None, max_volume_size=None, new_volume_handler=None, - concat_compression=False, password='', save_to_members=True): + concat_compression=False, password='', nacl=None, + save_to_members=True): """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to read from an existing archive, 'a' to append data to an existing file or 'w' to create a new file overwriting an existing one. `mode' @@ -1802,6 +1895,7 @@ class TarFile(object): self.mode = mode self.concat_compression = concat_compression self.password = password + self.nacl = nacl self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode] if not fileobj: @@ -1911,7 +2005,7 @@ class TarFile(object): @classmethod def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, - compresslevel=9, **kwargs): + compresslevel=9, **kwargs): """Open a tar archive for reading, writing or appending. Return an appropriate TarFile class. @@ -1942,9 +2036,6 @@ class TarFile(object): 'r#gz.aes128' open an aes128 encrypted stream of gzip compressed tar blocks for reading 'w#gz.aes128' open an aes128 encrypted stream of gzip compressed tar blocks for writing - 'r#gz.aes256' open an aes256 encrypted stream of gzip compressed tar blocks for reading - 'w#gz.aes256' open an aes256 encrypted stream of gzip compressed tar blocks for writing - 'r#aes128' open an aes128 encrypted stream of tar blocks for reading 'w#aes128' open an aes128 encrypted stream of tar blocks for writing """ @@ -1961,6 +2052,7 @@ class TarFile(object): try: return func(name, "r", fileobj, **kwargs) except (ReadError, CompressionError) as e: + # usually nothing exceptional but sometimes is if fileobj is not None: fileobj.seek(saved_pos) continue @@ -2010,6 +2102,7 @@ class TarFile(object): filemode, comptype = mode.split("#", 1) filemode = filemode or "r" password = '' + nacl = None encver = None if filemode not in "rw": @@ -2023,28 +2116,29 @@ class TarFile(object): # encryption gz.aes128 or gz.aes256 if "." in comptype: comptype, _ = comptype.split(".", 1) - encver = 1 # XXX set dynamically + encver = I2N_XXX_ENCRYPTION_VERSION # XXX set dynamically password = kwargs.get('password', '') if not password: raise ValueError("you should give a password for encryption") if comptype.startswith("aes"): comptype = 'tar' - encver = 1 # XXX set from mode or whatever - password = kwargs.get('password', '') - if not password: + encver = I2N_XXX_ENCRYPTION_VERSION # XXX set dynamically + password = kwargs.get ("password") + if password is None: raise ValueError("you should give a password for encryption") kwargs['concat_compression'] = True stream = _Stream(name, filemode, comptype, fileobj, bufsize, concat_stream=True, encver=encver, - password=password, compresslevel=compresslevel) + password=password, nacl=nacl, + compresslevel=compresslevel) try: t = cls(name, filemode, stream, **kwargs) - except: + except: # XXX except what? stream.close() - raise + raise # XXX raise what? t._extfileobj = False return t @@ -2165,7 +2259,6 @@ class TarFile(object): blocks, remainder = divmod(self.offset, RECORDSIZE) if remainder > 0: self.fileobj.write(NUL * (RECORDSIZE - remainder)) - if not self._extfileobj: self.fileobj.close() self.closed = True @@ -2471,6 +2564,22 @@ class TarFile(object): if target_size_left < BLOCKSIZE: target_size_left = BLOCKSIZE + # below attributes aren’t present with other compression methods + init_e = getattr (self.fileobj, "_init_write_encrypt", None) + init_c = getattr (self.fileobj, "_init_write_gz" , None) + finalize_e = getattr (self.fileobj, "_finalize_write_encrypt", None) + finalize_c = getattr (self.fileobj, "_finalize_write_gz" , None) + + def new_item_hook (): # crypto is outer, compress is inner + # We cannot finalize symmetrically after encryption because + # tar(5) mandates a trailer of “two records consisting entirely + # of zero bytes” which the stream appends as part of the + # .close() operation. + if finalize_c is not None: finalize_c () + if finalize_e is not None: finalize_e () + if init_e is not None: init_e (tarinfo.name) + if init_c is not None: init_c () + # loop over multiple volumes while source_size_left > 0: @@ -2480,6 +2589,7 @@ class TarFile(object): size_can_write = min(target_size_left, source_size_left) while size_can_write > 0: + new_item_hook () copyfileobj(fileobj, self.fileobj, size_can_write) self.offset += size_can_write source_size_left -= size_can_write @@ -2802,6 +2912,7 @@ class TarFile(object): """ source = self.fileobj source.seek(tarinfo.offset_data) + decrypt = False iterate = True target = bltn_open(targetpath, "wb") @@ -2820,7 +2931,7 @@ class TarFile(object): iterate = False try: copyfileobj(source, target, tarinfo.size) - except IOError: + except IOError: # on *any* IOError even in the target? source.close() # only if we are extracting a multivolume this can be treated if not self.new_volume_handler: @@ -2836,7 +2947,6 @@ class TarFile(object): tarinfo = self.firstmember source = self.fileobj iterate = True - target.close()