properly restart compression when encrypting
authorPhilipp Gesang <philipp.gesang@intra2net.com>
Mon, 24 Apr 2017 13:04:38 +0000 (15:04 +0200)
committerThomas Jarosch <thomas.jarosch@intra2net.com>
Mon, 2 Apr 2018 11:34:08 +0000 (13:34 +0200)
Separate finalization of a zlib block from creation of a new one.
Otherwise, we end up with trailing data from the last object that
lingers in the write buffer and gets flushed to the archive after
the next encrypted object has been initialized.

Also get rid of the “new_compression_block” wrapper which
needlessly complicated things.

Special precautions must be taken for the PAX format. Due to its
requirement of a global archive header, TarFile will write to the
stream prior to initialization that is performed in addfile().
Thus, initialize compression before the PAX header is being
written and properly restart compression for the first object
committed to the archive or volume.

deltatar/deltatar.py
deltatar/tarfile.py
testing/__init__.py

index 26375e8..6f21340 100644 (file)
@@ -534,7 +534,7 @@ class DeltaTar(object):
 
         sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
                                bufsize=tarfile.RECORDSIZE, fileobj=None,
-                               encryption=self.crypto_ctx, noinit=True)
+                               encryption=self.crypto_ctx)
         if self.crypto_ctx is not None and mode == "w":
             counter = None
             if kind == AUXILIARY_FILE_INFO:
index 13e092c..508f5a3 100644 (file)
@@ -418,8 +418,7 @@ class _Stream:
     remainder = -1 # track size in encrypted entries
 
     def __init__(self, name, mode, comptype, fileobj, bufsize,
-                 concat_stream=False, encryption=None, compresslevel=9,
-                 noinit=False):
+                 concat_stream=False, encryption=None, compresslevel=9):
         """Construct a _Stream object.
         """
         self._extfileobj = True
@@ -467,8 +466,8 @@ class _Stream:
                     if concat_stream is True:
                         self._init_read_gz()
                 elif mode == "w":
-                    if noinit is False:
-                        self._new_gz_block()
+                    if self.encryption is None and concat_stream is False:
+                        self._init_write_gz ()
                 self.crc = zlib.crc32(b"") & 0xFFFFffff
 
             elif comptype == "bz2":
@@ -575,25 +574,10 @@ class _Stream:
                 self.buf = b""
 
 
-    def new_compression_block(self):
-        '''
-        Used to notify a new tar block is coming to create a new zip block
-        '''
-        if self.mode != "w":
-            raise CompressionError("new compression blocks can only be added in mode 'w'")
-        if self.comptype == "gz":
-            self._new_gz_block(True)
-        elif self.encryption is not None:
-            pass
-        else:
-            raise CompressionError("Concat compression only available for comptype 'gz'")
-
-    def _new_gz_block(self, set_last_block_offset=False):
+    def _init_write_gz (self, set_last_block_offset=False):
         '''
         Add a new gzip block, closing last one
         '''
-        self.close(close_fileobj=False)
-        self.closed = False
         self.concat_pos = 0
         self.crc = self.zlib.crc32(b"") & 0xFFFFffff
         first = False
@@ -606,9 +590,7 @@ class _Stream:
                                          0)
 
         # if aes, we encrypt after compression
-        if self.encryption is not None:
-            pass
-        elif set_last_block_offset:
+        if self.encryption is None and set_last_block_offset:
             self.last_block_offset = self.fileobj.tell()
 
         self.__write(gz_header (self.name if first is True else None))
@@ -695,12 +677,11 @@ class _Stream:
         if self.closed:
             return
 
-        if self.mode == "w":
-            self._finalize_write_gz ()
-            self.__enc_write(self.buf)
-
         if close_fileobj is True:
+
             if self.mode == "w":
+                if self.cmp is not None:
+                    self._finalize_write_gz ()
                 # end of Tar archive marker (two empty blocks) was written
                 self._finalize_write_encrypt ()
             if not self._extfileobj:
@@ -2443,12 +2424,15 @@ class TarFile(object):
 
         tarinfo = copy.copy(tarinfo)
 
+        if self.concat_compression is True:
+            self.fileobj._finalize_write_gz ()
+
         if getattr (self.fileobj, "encryption", None) is not None:
             self.fileobj._finalize_write_encrypt ()
             self.fileobj._init_write_encrypt (tarinfo.name)
 
         if self.concat_compression:
-            self.fileobj.new_compression_block()
+            self.fileobj._init_write_gz (True)
             self.last_block_offset = self.fileobj.last_block_offset
         else:
             self.last_block_offset = self.fileobj.tell()
@@ -2502,9 +2486,6 @@ class TarFile(object):
                 # Only finalize the crypto entry here if we’re continuing with
                 # another one; otherwise, the encryption must include the block
                 # padding below.
-                if getattr (self.fileobj, "encryption", None) is not None:
-                    self.fileobj.close (close_fileobj=True)
-
                 tarinfo.type = GNUTYPE_MULTIVOL
 
                 if not self.new_volume_handler or\
@@ -2523,14 +2504,19 @@ class TarFile(object):
                 tarinfo.volume_offset = tarinfo.size - source_size_left
                 self.volume_tarinfo = tarinfo
 
+                # the “new_volume_handler” is supposed to call .close() on the
+                # “fileobj” _Stream
                 self.new_volume_handler(self, self.base_name, self.volume_number)
+                if getattr (self.fileobj, "cmp", None) is not None:
+                    # e. g. compressed PAX header written
+                    self.fileobj._finalize_write_gz ()
 
                 self.volume_tarinfo = None
 
                 if getattr (self.fileobj, "encryption", None) is not None:
                     self.fileobj._init_write_encrypt (tarinfo.name)
-                if self.concat_compression:
-                    self.fileobj.new_compression_block()
+                if getattr (self.fileobj, "cmp", None) is not None:
+                    self.fileobj._init_write_gz ()
 
                 # write new volume header
                 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
@@ -2628,6 +2614,8 @@ class TarFile(object):
 
                     self.pax_headers.update(volume_info)
 
+                    if isinstance(self.fileobj, _Stream):
+                        self.fileobj._init_write_gz ()
                     buf = self.tarinfo.create_pax_global_header(volume_info.copy())
                     self.fileobj.write(buf)
                     self.offset += len(buf)
index 1553324..3c71341 100644 (file)
@@ -52,7 +52,7 @@ class BaseTest(unittest.TestCase):
         '''
         Remove temporal files created by unit tests
         '''
-        os.system("rm -rf big big2 small small2 sample.*")
+        os.system("rm -rf big big2 small small2 sample.* pdtcrypt-object-*.bin")
 
     def create_file(self, path, length):
         '''