handle problems with incomplete gzip headers
authorPhilipp Gesang <philipp.gesang@intra2net.com>
Thu, 31 Aug 2017 11:37:10 +0000 (13:37 +0200)
committerThomas Jarosch <thomas.jarosch@intra2net.com>
Mon, 2 Apr 2018 11:34:09 +0000 (13:34 +0200)
Throw the appropriate exn to signal EOF or malformed data
conditions when tentatively parsing GZip headers.

deltatar/tarfile.py

index 4987d7d..06f4524 100644 (file)
@@ -44,6 +44,7 @@ __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend, Eduardo Robl
 import binascii
 import copy
 import errno
+import functools
 import io
 import mmap
 import operator
@@ -838,8 +839,11 @@ class _Stream:
         if read2 != GZ_MAGIC_BYTES:
             raise ReadError("not a gzip file")
 
-        read1 = ord (self.__read(1))
-        if read1 != GZ_METHOD_DEFLATE:
+        read1 = self.__read(1)
+        if read1 == b"":
+            raise EndOfFile ("_init_read_gz(): read returned zero bytes inside "
+                             "gzip header at pos %d" % self.fileobj.tell())
+        if ord (read1) != GZ_METHOD_DEFLATE:
             raise CompressionError("unsupported compression method")
 
         self.flags = flag = ord(self.__read(1))
@@ -2246,11 +2250,13 @@ class TarFile(object):
     def open_at_offset(cls, offset, *a, **kwa):
         """
         Same as ``.open()``, but start reading at the given offset. Assumes a
-        seekable file object.
+        seekable file object. Returns *None* if opening failed due to a read
+        problem.
         """
         fileobj = kwa.get ("fileobj")
         if fileobj is not None:
             fileobj.seek (offset)
+
         return cls.open (*a, **kwa)
 
 
@@ -3714,15 +3720,18 @@ def read_tarobj_at_offset (fileobj, offset, mode, secret=None):
         else:
             raise RuntimeError
 
-    tarobj = \
-        TarFile.open_at_offset (offset,
-                                mode=mode,
-                                fileobj=fileobj,
-                                format=GNU_FORMAT,
-                                concat='#' in mode,
-                                encryption=decr,
-                                save_to_members=False,
-                                tolerance=TOLERANCE_RESCUE)
+    try:
+        tarobj = \
+            TarFile.open_at_offset (offset,
+                                    mode=mode,
+                                    fileobj=fileobj,
+                                    format=GNU_FORMAT,
+                                    concat='#' in mode,
+                                    encryption=decr,
+                                    save_to_members=False,
+                                    tolerance=TOLERANCE_RESCUE)
+    except (ReadError, EndOfFile):
+        return None
 
     return tarobj.next ()
 
@@ -3772,12 +3781,6 @@ def gen_rescue_index (gen_volume_name, mode, maxvol=None, password=None, key=Non
 
     nvol = 0
 
-    def aux (o, nvol, ti):
-        ie = idxent_of_tarinfo (ti)
-        ie ["offset"] = o
-        ie ["volume"] = nvol
-        return ie
-
     while True:
         vpath = gen_volume_name (nvol)
         try:
@@ -3797,11 +3800,22 @@ def gen_rescue_index (gen_volume_name, mode, maxvol=None, password=None, key=Non
                 break
 
         fileobj = bltn_open (vpath, "rb")
-        infos  += [ (off, nvol, read_tarobj_at_offset (fileobj, off, mode,
-                                                       secret=secret))
-                    for off in offsets ]
+
+        def aux (acc, off):
+            obj = read_tarobj_at_offset (fileobj, off, mode, secret=secret)
+            if obj is not None:
+                acc.append ((off, nvol, obj))
+            return acc
+        infos += functools.reduce (aux, offsets, [])
+
         nvol += 1
 
+    def aux (o, nvol, ti):
+        ie = idxent_of_tarinfo (ti)
+        ie ["offset"] = o
+        ie ["volume"] = nvol
+        return ie
+
     psidx   = [ aux (o, nvol, ti) for o, nvol, ti in infos ]
 
     return psidx