"""Read from and write to tar format archives.
"""
+I2N_XXX_ENCRYPTION_VERSION = 0
+
__version__ = "$Revision: 85213 $"
# $Source$
#---------
# Imports
#---------
+import binascii
import sys
import os
import io
import re
import operator
+import traceback # XXX
+
from . import crypto
try:
dst.write(buf)
if len(buf) < remainder:
raise OSError("end of file reached")
- return
+
def filemode(mode):
"""Deprecated in this location; use stat.filemode."""
class DecryptionError(TarError):
"""Exception for error during decryption."""
pass
+class EncryptionError(TarError):
+ """Exception for error during decryption."""
+ pass
#---------------------------
# internal stream interface
def __init__(self, name, mode):
_mode = {
"r": os.O_RDONLY,
- "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
+ "w": os.O_RDWR | os.O_CREAT | os.O_TRUNC,
}[mode]
if hasattr(os, "O_BINARY"):
_mode |= os.O_BINARY # pylint: disable=no-member
def tell(self):
return self.offset
+ def seek_set (self, pos):
+ os.lseek (self.fd, pos, os.SEEK_SET)
+ self.offset = pos
+
class _Stream:
"""Class that serves as an adapter between TarFile and
_Stream is intended to be used only internally.
"""
+ remainder = -1 # track size in encrypted entries
+
def __init__(self, name, mode, comptype, fileobj, bufsize,
- concat_stream=False, encver=None, password="",
- compresslevel=9):
+ concat_stream=False, encver=None, password=None,
+ nacl=None, compresslevel=9):
"""Construct a _Stream object.
"""
self._extfileobj = True
self.flags = 0
self.internal_pos = 0
self.concat_stream = concat_stream
- self.encver = encver
- self.password = password
self.last_block_offset = 0
self.dbuf = b"" # ???
- self.aes_buf = b"" # ???
self.exception = None
self.compresslevel = compresslevel
self.bytes_written = 0
+ # crypto parameters
+ self.encver = encver
+ self.password = password
+ self.encryption = None
+ self.lasthdr = None
try:
if comptype == "gz":
raise CompressionError("zlib module is not available")
self.zlib = zlib
if mode == "r":
+ self._init_read_gz()
if self.encver is not None:
+ if password is None:
+ raise InvalidEncryptionError \
+ ("encryption requested (v=%d) but no password given"
+ % encver)
try:
- enc = crypto.Crypto (crypto.DECRYPT, pw, nacl, 1)
+ enc = crypto.Decrypt (password)
except ValueError as exn:
raise InvalidEncryptionError \
- ("ctor failed (%r, <PASSWORD>, “%s”, %r)"
- % (crypto.DECRYPT, nacl, 1))
+ ("ctor failed crypto.Decrypt(<PASSWORD>)")
self.encryption = enc
- self._init_read_gz()
- self.exception = zlib.error
else:
+ if self.encver is None:
+ # Layers are stacked differently: initialization is
+ # necessary per file.
+ if password is None:
+ raise InvalidEncryptionError \
+ ("encryption requested (v=%d) but no password given"
+ % encver)
+ try:
+ enc = crypto.Encrypt (password, I2N_XXX_ENCRYPTION_VERSION, nacl)
+ except ValueError as exn:
+ raise InvalidEncryptionError \
+ ("ctor failed crypto.Encrypt(<PASSWORD>, “%s”, %r)"
+ % (nacl, 1))
+ self.encryption = enc
self._init_write_gz()
+ self.exception = zlib.error # XXX what for? seems unused
self.crc = zlib.crc32(b"") & 0xFFFFffff
elif comptype == "bz2":
raise InvalidEncryptionError("encryption version %r not "
"available for compression %s"
% (encver, comptype))
- try:
- import lzma
- except ImportError:
- raise CompressionError("lzma module is not available")
- if mode == "r":
- self.dbuf = b""
- self.cmp = lzma.LZMADecompressor()
- self.exception = lzma.LZMAError
- else:
- self.cmp = lzma.LZMACompressor()
+ try:
+ import lzma
+ except ImportError:
+ raise CompressionError("lzma module is not available")
+ if mode == "r":
+ self.dbuf = b""
+ self.cmp = lzma.LZMADecompressor()
+ self.exception = lzma.LZMAError
+ else:
+ self.cmp = lzma.LZMACompressor()
+
+ elif comptype != "tar":
+ if self.encver is not None:
+ raise InvalidEncryptionError("encryption version %r not "
+ "available for compression %s"
+ % (encver, comptype))
+ raise CompressionError("unknown compression type %r" % comptype)
- elif self.encver is not None:
+ else: # no compression
if mode == "r":
+ if password is None:
+ raise InvalidEncryptionError \
+ ("encryption requested (v=%d) but no password given"
+ % encver)
try:
- enc = crypto.Crypto (crypto.DECRYPT, pw, nacl, 1)
+ enc = crypto.Decrypt (password)
except ValueError as exn:
raise InvalidEncryptionError \
- ("ctor failed (%r, <PASSWORD>, “%s”, %r)"
- % (crypto.DECRYPT, nacl, 1))
+ ("ctor failed crypto.Decrypt(<PASSWORD>)")
self.encryption = enc
- else:
+ elif mode == "w":
+ if password is None:
+ raise InvalidEncryptionError \
+ ("encryption requested (v=%d) but no password given"
+ % encver)
try:
- enc = crypto.Crypto (crypto.ENCRYPT, pw, nacl, 1)
+ enc = crypto.Encrypt (password, I2N_XXX_ENCRYPTION_VERSION, nacl)
except ValueError as exn:
raise InvalidEncryptionError \
- ("ctor failed (%r, <PASSWORD>, “%s”, %r)"
- % (crypto.DECRYPT, nacl, 1))
+ ("ctor failed crypto.Encrypt(<PASSWORD>, “%s”, %r)"
+ % (nacl, 1))
self.encryption = enc
- self.encryption.init()
- self.__write_to_file(self.encryption.salt_str)
- elif comptype != "tar":
- if self.encver is not None:
- raise InvalidEncryptionError("encryption version %r not "
- "available for compression %s"
- % (encver, comptype))
- raise CompressionError("unknown compression type %r" % comptype)
-
- except:
+ except: # XXX seriously?
if not self._extfileobj:
self.fileobj.close()
self.closed = True
if hasattr(self, "closed") and not self.closed:
self.close()
+
+ def _init_write_encrypt (self, entry=None):
+ """Save position for delayed write of header; fill the header location
+ with dummy bytes."""
+ if self.encryption is not None:
+ # first thing, proclaim new object to the encryption context
+ # secondly, assemble the header with the updated parameters
+ # and commit it directly to the underlying stream, bypassing the
+ # encryption layer in .__write().
+ dummyhdr = self.encryption.next (entry,
+ version=DELTATAR_HEADER_VERSION,
+ paramversion=self.encver,
+ nacl=self.encryption.nacl)
+ if dummyhdr is None:
+ raise EncryptionError ("Crypto.next(): bad dummy header") # XXX
+
+ self.lasthdr = self.fileobj.tell()
+ self.__write_to_file(dummyhdr)
+
+
+ def _finalize_write_encrypt (self):
+ """
+ Seek back to header position, read dummy bytes, finalize crypto
+ obtaining the actual header, write header, seek back to current
+ position.
+ """
+ if self.encryption is not None \
+ and self.lasthdr is not None :
+ self.__sync ()
+ pos0 = self.fileobj.tell ()
+ self.fileobj.seek_set (self.lasthdr)
+ dummy = self.fileobj.read (crypto.I2N_HDR_SIZE)
+ pos1 = self.fileobj.tell ()
+ dpos = pos1 - self.lasthdr
+ assert dpos == crypto.I2N_HDR_SIZE
+ data, hdr = self.encryption.done (dummy)
+ self.fileobj.seek_set (self.lasthdr)
+ self.__write_to_file(hdr)
+ self.fileobj.seek_set (pos0)
+ self.__write_to_file(data) # append remainder of data
+ self.lasthdr = -1
+
+
def _init_write_gz(self):
"""Initialize for writing with gzip compression.
"""
+ if getattr(self, "zlib", None) is None:
+ return
self.cmp = self.zlib.compressobj(self.compresslevel,
self.zlib.DEFLATED,
-self.zlib.MAX_WBITS,
self.zlib.DEF_MEM_LEVEL,
0)
-
- # if aes, we encrypt after compression
- if self.encver is not None:
- hdr = crypto.hdr_from_params \
- (version=DELTATAR_HEADER_VERSION,
- paramversion=self.encver,
- nacl=self.encryption.salt_str,
- iv=self.encryption.iv,
- ctsize=self.encryption.ctsize)
- self.__write_to_file(hdr)
-
timestamp = struct.pack("<L", int(time.time()))
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
if type(self.name) is str:
self.name = self.name.encode("iso-8859-1", "replace")
- if self.name.endswith(b".aes128") or self.name.endswith(b".aes256"):
+ if self.name.endswith(b".aes128"):
self.name = self.name[:-7]
if self.name.endswith(b".gz"):
self.name = self.name[:-3]
# RFC1952 says we must use ISO-8859-1 for the FNAME field.
self.__write(self.name + NUL)
+
def new_compression_block(self):
'''
Used to notify a new tar block is coming to create a new zip block
raise CompressionError("new compression blocks can only be added in mode 'w'")
if self.comptype == "gz":
self._new_gz_block(True)
- elif self.encver is not None:
- self._new_aes_block(True)
+ elif self.encryption is not None:
+ pass # XXX
+ #self._new_aes_block(True)
else:
raise CompressionError("Concat compression only available for comptype 'gz'")
0)
# if aes, we encrypt after compression
- if self.encver is not None:
- self._new_aes_block(set_last_block_offset)
+ if self.encryption is not None:
+ #self._new_aes_block(set_last_block_offset)
+ raise Exception ("XXX sorry, no can do")
elif set_last_block_offset:
self.last_block_offset = self.fileobj.tell()
self.__write(b"\037\213\010\000" + timestamp + b"\002\377")
+ # !!! THIS DOES **NOT** HANDLE ACTUAL AES BLOCKS WHICH HAVE A FIXED
+ # !!! SIZE OF 16 BYTES
def _new_aes_block(self, set_last_block_offset=False):
+ # TODO do kill this off along with the entirety of aescrypto.py
# this basically checks if it comes from new_compression_block() call,
# in which case we have to call to close
if self.comptype == "tar":
self.close(close_fileobj=False)
self.closed = False
- data, tag = self.encryption.next (aad=self.name)
- if data is not None or tag is not None:
- self.__write_to_file(data)
- self.__write_to_file(tag)
-
- if set_last_block_offset:
+ if set_last_block_offset: # XXX does this belong before the header or after?
self.last_block_offset = self.fileobj.tell()
- hdr = crypto.hdr_from_params \
- (version=DELTATAR_HEADER_VERSION,
- paramversion=self.encver,
- nacl=self.encryption.salt_str,
- iv=self.encryption.iv,
- ctsize=self.encryption.ctsize)
+ hdr = self.encryption.next (self.name,
+ version=DELTATAR_HEADER_VERSION,
+ paramversion=self.encver,
+ nacl=self.encryption.nacl)
+ if hdr is None:
+ raise EncryptionError ("Crypto.next(): bad header")
self.__write_to_file(hdr)
def write(self, s):
s = self.cmp.compress(s)
self.__write(s)
+ def __sync(self):
+ """Write what’s self in the buffer to the stream."""
+ self.__write (b"") # → len (buf) <= bufsiz
+ self.__enc_write (self.buf)
+ self.buf = b""
+
def __write(self, s):
"""Writes (and encodes) string s to the stream blockwise
the file
'''
tow = s
- if self.encver is not None:
+ if self.encryption is not None:
tow = self.encryption.process(s)
self.__write_to_file(tow)
result += len(self.buf)
if self.comptype == 'gz':
result += 8 # 2 longs = 8 byte (no extra info written for bzip2)
- if self.encver is not None:
+ if self.encryption is not None:
result += self.encryption.bs # (salt was already written at start)
return result
chunk += struct.pack("<L", self.crc & 0xffffffff)
chunk += struct.pack("<L", self.concat_pos & 0xffffFFFF)
self.__enc_write(chunk)
+ finalize_e = getattr (self, "_finalize_write_encrypt", None)
+ finalize_c = getattr (self, "_finalize_write_gz" , None)
+ if finalize_c is not None: finalize_c ()
+ if finalize_e is not None: finalize_e ()
if close_fileobj and not self._extfileobj:
- if self.encver is not None and self.mode == "w":
- self.__write_to_file(self.encryption.close_enc())
self.fileobj.close()
# read the zlib crc and length and check them
def _init_read_gz(self):
"""Initialize for reading a gzip compressed fileobj.
"""
+ if getattr(self, "zlib", None) is None:
+ return
self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
# taken from gzip.GzipFile with some alterations
if flag & 2:
self.__read(2)
+
+ def _init_read_encrypt (self):
+ """Initialize encryption for next entry in archive. Read a header and
+ notify the crypto context."""
+ if self.encryption is not None:
+ self.lasthdr = self.fileobj.tell ()
+ ok, hdr = crypto.hdr_read_stream (self.fileobj)
+ if ok is False:
+ raise DecryptionError ("Crypto.hdr_read_stream(): error “%s” "
+ "processing %r" % (hdr, self.fileobj))
+ self.remainder = hdr ["ctsize"] # distance to next header
+ return self.encryption.next (hdr)
+
+
+ def _finalize_read_encrypt (self):
+ """
+ Finalize decryption.
+ """
+ if self.encryption is not None \
+ and self.lasthdr is not None :
+ assert self.remainder >= 0
+ if self.remainder > 0:
+ # should only be happening with the last item in the archive
+ #zeroes = self.fileobj.read (self.remainder)
+ #pred_all_zero = lambda c: c == 0
+ #if all (pred_all_zero (c) for c in zeroes) is False:
+ # # this isn’t the last two blocks; something is messed up
+ # raise DecryptionError ("trailing blocks of %d B contain "
+ # "non-zero bytes" % self.remainder)
+ #assert len (zeroes) == self.remainder
+ self.remainder = 0
+ ok, data = self.encryption.done ()
+ if ok is False: # XXX handle error
+ print(">> !!!!!!! %s" % data)
+ print(">> bad tag %s" % self.encryption.tag)
+ return data
+
+
def tell(self):
"""Return the stream's file pointer position.
"""
t = [self.dbuf]
l_buf = self.bufsize # not mutated
- if self.encver is not None:
- tag1 = None # carry if spanning block bounds
- l_tag = crypto.I2N_TLR_SIZE_TAG
- b_tag = size - l_tag
- e_tag = size
-
while c < size:
buf = self.__read(l_buf)
if not buf:
break
- if self.encver is not None:
- tag = None
- r = len (buf)
- cr = c + r
- rem = size - cr
-
- if tag1 is not None: # read rest of tag
- tag = tag1 + buf
- assert len (tag) == l_tag
- elif rem == 0:
- split = l_buf - l_tag
- ctxt = buf [0 : split]
- tag = buf [split : ]
- elif rem < 0:
- if r == l_tag: # read entire tag only
- ctxt = None
- tag = buf
- else: # r > l_tag
- split = r - l_buf
- ctxt = buf [0 : split]
- tag = buf [split : ]
- elif cr > b_tag: # rem > 0 ∧ tag bleeding into next block
- got = cr - b_tag
- split = r - got
- ctxt = buf [0 : split]
- tag1 = buf [split : ] # see “scope” above
- else: # entire buffer is data
- ctxt = buf
-
- if ctxt is not None:
- ok, buf = self.encryption.process_chunk (ctxt)
- if ok is False:
- raise
- raise DecryptionError("error decrypting [%d:%d)"
- % (c, cr))
- if tag is not None:
- try:
- ok, ret, _ = self.encryption.done (tag)
- except cryptography.InvalidTag as exn:
- raise DecryptionError("authentication tag mismatch")
- if ok is False:
- raise DecryptionError("error finalizing stream: %s"
- % ret)
- break # tag valid ∧ no further data
-
if self.comptype != "tar":
try:
buf = self.cmp.decompress(buf)
"""
c = len(self.buf)
t = [self.buf]
+ i = 0
while c < size:
- buf = self.fileobj.read(size)
- if not buf:
- ## XXX stream terminated prematurely; this should be an error
+ i += 1
+ todo = size
+ if self.encryption is not None:
+ if self.remainder <= 0:
+ # prepare next object
+ self._init_read_encrypt ()
+ # only read up to the end of the encrypted object
+ todo = min (size, self.remainder)
+ buf = self.fileobj.read(todo)
+ if self.encryption is not None:
+ # decrypt the thing
+ buf = self.encryption.process (buf)
+ if todo == self.remainder:
+ # at the end of a crypto object; finalization will fail if
+ # the GCM tag does not match
+ trailing = self._finalize_read_encrypt ()
+ if len (trailing) > 0:
+ buf += trailing
+ self.remainder = 0
+ else:
+ self.remainder -= todo
+
+ if not buf: ## XXX stream terminated prematurely; this should be an error
break
+
t.append(buf)
c += len(buf)
self.internal_pos += len(buf)
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
errors="surrogateescape", pax_headers=None, debug=None,
errorlevel=None, max_volume_size=None, new_volume_handler=None,
- concat_compression=False, password='', save_to_members=True):
+ concat_compression=False, password='', nacl=None,
+ save_to_members=True):
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
read from an existing archive, 'a' to append data to an existing
file or 'w' to create a new file overwriting an existing one. `mode'
self.mode = mode
self.concat_compression = concat_compression
self.password = password
+ self.nacl = nacl
self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
if not fileobj:
@classmethod
def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE,
- compresslevel=9, **kwargs):
+ compresslevel=9, **kwargs):
"""Open a tar archive for reading, writing or appending. Return
an appropriate TarFile class.
'r#gz.aes128' open an aes128 encrypted stream of gzip compressed tar blocks for reading
'w#gz.aes128' open an aes128 encrypted stream of gzip compressed tar blocks for writing
- 'r#gz.aes256' open an aes256 encrypted stream of gzip compressed tar blocks for reading
- 'w#gz.aes256' open an aes256 encrypted stream of gzip compressed tar blocks for writing
-
'r#aes128' open an aes128 encrypted stream of tar blocks for reading
'w#aes128' open an aes128 encrypted stream of tar blocks for writing
"""
try:
return func(name, "r", fileobj, **kwargs)
except (ReadError, CompressionError) as e:
+ # usually nothing exceptional but sometimes is
if fileobj is not None:
fileobj.seek(saved_pos)
continue
filemode, comptype = mode.split("#", 1)
filemode = filemode or "r"
password = ''
+ nacl = None
encver = None
if filemode not in "rw":
# encryption gz.aes128 or gz.aes256
if "." in comptype:
comptype, _ = comptype.split(".", 1)
- encver = 1 # XXX set dynamically
+ encver = I2N_XXX_ENCRYPTION_VERSION # XXX set dynamically
password = kwargs.get('password', '')
if not password:
raise ValueError("you should give a password for encryption")
if comptype.startswith("aes"):
comptype = 'tar'
- encver = 1 # XXX set from mode or whatever
- password = kwargs.get('password', '')
- if not password:
+ encver = I2N_XXX_ENCRYPTION_VERSION # XXX set dynamically
+ password = kwargs.get ("password")
+ if password is None:
raise ValueError("you should give a password for encryption")
kwargs['concat_compression'] = True
stream = _Stream(name, filemode, comptype, fileobj, bufsize,
concat_stream=True, encver=encver,
- password=password, compresslevel=compresslevel)
+ password=password, nacl=nacl,
+ compresslevel=compresslevel)
try:
t = cls(name, filemode, stream, **kwargs)
- except:
+ except: # XXX except what?
stream.close()
- raise
+ raise # XXX raise what?
t._extfileobj = False
return t
blocks, remainder = divmod(self.offset, RECORDSIZE)
if remainder > 0:
self.fileobj.write(NUL * (RECORDSIZE - remainder))
-
if not self._extfileobj:
self.fileobj.close()
self.closed = True
if target_size_left < BLOCKSIZE:
target_size_left = BLOCKSIZE
+ # below attributes aren’t present with other compression methods
+ init_e = getattr (self.fileobj, "_init_write_encrypt", None)
+ init_c = getattr (self.fileobj, "_init_write_gz" , None)
+ finalize_e = getattr (self.fileobj, "_finalize_write_encrypt", None)
+ finalize_c = getattr (self.fileobj, "_finalize_write_gz" , None)
+
+ def new_item_hook (): # crypto is outer, compress is inner
+ # We cannot finalize symmetrically after encryption because
+ # tar(5) mandates a trailer of “two records consisting entirely
+ # of zero bytes” which the stream appends as part of the
+ # .close() operation.
+ if finalize_c is not None: finalize_c ()
+ if finalize_e is not None: finalize_e ()
+ if init_e is not None: init_e (tarinfo.name)
+ if init_c is not None: init_c ()
+
# loop over multiple volumes
while source_size_left > 0:
size_can_write = min(target_size_left, source_size_left)
while size_can_write > 0:
+ new_item_hook ()
copyfileobj(fileobj, self.fileobj, size_can_write)
self.offset += size_can_write
source_size_left -= size_can_write
"""
source = self.fileobj
source.seek(tarinfo.offset_data)
+ decrypt = False
iterate = True
target = bltn_open(targetpath, "wb")
iterate = False
try:
copyfileobj(source, target, tarinfo.size)
- except IOError:
+ except IOError: # on *any* IOError even in the target?
source.close()
# only if we are extracting a multivolume this can be treated
if not self.new_volume_handler:
tarinfo = self.firstmember
source = self.fileobj
iterate = True
-
target.close()