explicitly constuct zlib headers
authorPhilipp Gesang <philipp.gesang@intra2net.com>
Mon, 27 Mar 2017 14:06:18 +0000 (16:06 +0200)
committerThomas Jarosch <thomas.jarosch@intra2net.com>
Mon, 2 Apr 2018 11:34:08 +0000 (13:34 +0200)
Get rid of the unnecessary literal byte strings. Commit

    5fdff89f4d9fa28e6b210d40d389680072651eb7

introduced headers for additional blocks, omitting the “original
file name” field that gzip set by default.

deltatar/deltatar.py
deltatar/tarfile.py

index 5ff2893..f5493ce 100644 (file)
@@ -18,7 +18,7 @@
 
 # Author: Eduardo Robles Elvira <edulix@wadobo.com>
 
-I2N_XXX_ENCRYPTION_VERSION = 0
+I2N_XXX_ENCRYPTION_VERSION = 1
 
 import logging
 import datetime
@@ -499,6 +499,7 @@ class DeltaTar(object):
             comptype = 'tar'
 
         encver = None
+        counter = None
         if 'aes' in self.index_mode:
             encver = I2N_XXX_ENCRYPTION_VERSION
             counter = None
index e4b6910..3576879 100644 (file)
@@ -120,6 +120,13 @@ DEFAULT_FORMAT = GNU_FORMAT
 DELTATAR_HEADER_VERSION    = 1
 DELTATAR_PARAMETER_VERSION = 1
 
+GZ_FMT_HEADER        = b"<BBBBLBB"
+GZ_MAGIC             = (0x1f, 0x8b) # 0o37, 0o213
+GZ_METHOD_DEFLATE    = 0x08 # 0o10
+GZ_FLAG_ORIG_NAME    = 0x08 # 0o10, default in gzip
+GZ_DEFLATE_FLAGS     = 0x00 # 0o00, never read (deflate.c)
+GZ_OS_CODE           = 0x03 # 0o03, default in gzip (tailor.h)
+
 #---------------------------------------------------------
 # tarfile constants
 #---------------------------------------------------------
@@ -367,6 +374,32 @@ class _LowLevelFile:
         self.offset = pos
 
 
+
+def gz_header (name=None):
+    timestamp = int(time.time())
+    flags     = 0x0
+
+    if name is None:
+        name = b""
+    else:
+        flags |= GZ_FLAG_ORIG_NAME
+        if type(name) is str:
+            name = name.encode("iso-8859-1", "replace")
+        if name.endswith(b".aes128"):
+            name = name[:-7]
+        if name.endswith(b".gz"):
+            name = name[:-3]
+        # RFC1952 says we must use ISO-8859-1 for the FNAME field.
+        name += NUL
+
+    hdr = struct.pack (GZ_FMT_HEADER,
+                       GZ_MAGIC [0], GZ_MAGIC [1],
+                       GZ_METHOD_DEFLATE, flags,
+                       timestamp,
+                       GZ_DEFLATE_FLAGS, GZ_OS_CODE)
+
+    return hdr + name
+
 class _Stream:
     """Class that serves as an adapter between TarFile and
        a stream-like object.  The stream-like object only
@@ -444,7 +477,7 @@ class _Stream:
                                     ("ctor failed crypto.Decrypt(<PASSWORD>)")
                         self.encryption = enc
                 else:
-                    if self.encver is None:
+                    if self.encver is not None:
                         # Layers are stacked differently: initialization is
                         # necessary per file.
                         if password is None:
@@ -460,7 +493,6 @@ class _Stream:
                                     ("ctor failed crypto.Encrypt(<PASSWORD>, “%s”, %r)"
                                      % (nacl, 1))
                         self.encryption = enc
-                    #self._init_write_gz()
                 self.exception = zlib.error # XXX what for? seems unused
                 self.crc = zlib.crc32(b"") & 0xFFFFffff
 
@@ -588,28 +620,6 @@ class _Stream:
             self.lasthdr = -1
 
 
-    def _init_write_gz(self):
-        """Initialize for writing with gzip compression.
-        """
-        if getattr(self, "zlib", None) is None:
-            return
-        self.cmp = self.zlib.compressobj(self.compresslevel,
-                                         self.zlib.DEFLATED,
-                                         -self.zlib.MAX_WBITS,
-                                         self.zlib.DEF_MEM_LEVEL,
-                                         0)
-        timestamp = struct.pack("<L", int(time.time()))
-        self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
-        if type(self.name) is str:
-            self.name = self.name.encode("iso-8859-1", "replace")
-        if self.name.endswith(b".aes128"):
-            self.name = self.name[:-7]
-        if self.name.endswith(b".gz"):
-            self.name = self.name[:-3]
-        # RFC1952 says we must use ISO-8859-1 for the FNAME field.
-        self.__write(self.name + NUL)
-
-
     def _finalize_write_gz (self):
         if self.cmp is not None:
             chunk = self.buf + self.cmp.flush()
@@ -624,6 +634,7 @@ class _Stream:
                     chunk += struct.pack("<L", self.crc & 0xffffffff)
                     chunk += struct.pack("<L", self.concat_pos & 0xffffFFFF)
                 self.__enc_write (chunk)
+                self.buf = b""
 
 
     def new_compression_block(self):
@@ -647,6 +658,9 @@ class _Stream:
         self.closed = False
         self.concat_pos = 0
         self.crc = self.zlib.crc32(b"") & 0xFFFFffff
+        first = False
+        if self.cmp is None:
+            first = True
         self.cmp = self.zlib.compressobj(self.compresslevel,
                                          self.zlib.DEFLATED,
                                          -self.zlib.MAX_WBITS,
@@ -659,8 +673,7 @@ class _Stream:
         elif set_last_block_offset:
             self.last_block_offset = self.fileobj.tell()
 
-        timestamp = struct.pack("<L", int(time.time()))
-        self.__write(b"\037\213\010\000" + timestamp + b"\002\377")
+        self.__write(gz_header (self.name if first is True else None))
 
 
     def write(self, s):
@@ -752,9 +765,8 @@ class _Stream:
         if self.mode == "w":
             self._finalize_write_gz ()
             self.__enc_write(self.buf)
-            if self.enc is not None:
+            if self.encryption is not None:
                 self._finalize_write_encrypt ()
-            self.buf = b""
 
         if close_fileobj and not self._extfileobj:
             self.fileobj.close()
@@ -2539,20 +2551,17 @@ class TarFile(object):
             self.last_block_offset = self.fileobj.tell()
 
         # below attributes aren’t present with other compression methods
-        init_e = getattr (self.fileobj, "_init_write_encrypt", None)
-        init_c = getattr (self.fileobj, "_init_write_gz"     , None)
+        init_e     = getattr (self.fileobj, "_init_write_encrypt",     None)
         finalize_e = getattr (self.fileobj, "_finalize_write_encrypt", None)
-        finalize_c = getattr (self.fileobj, "_finalize_write_gz"     , None)
 
         def new_item_hook (): # crypto is outer, compress is inner
             if init_e is not None: init_e (tarinfo.name)
-            if init_c is not None: init_c ()
 
         def end_item_hook (): # crypto is outer, compress is inner
-            if finalize_c is not None: finalize_c ()
             if finalize_e is not None: finalize_e ()
 
         end_item_hook () # finalize current object
+        new_item_hook ()
 
         buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
         self.fileobj.write(buf)
@@ -2582,7 +2591,6 @@ class TarFile(object):
         if target_size_left < BLOCKSIZE:
             target_size_left = BLOCKSIZE
 
-        new_item_hook ()
         # loop over multiple volumes
         while source_size_left > 0: