developer.intra2net.com Git - python-delta-tar/blob - deltatar/tarfile.py

   1 #!/usr/bin/env python3
   2 #-------------------------------------------------------------------
   3 # tarfile.py
   4 #-------------------------------------------------------------------
   5 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
   6 # All rights reserved.
   7 #
   8 # Permission  is  hereby granted,  free  of charge,  to  any person
   9 # obtaining a  copy of  this software  and associated documentation
  10 # files  (the  "Software"),  to   deal  in  the  Software   without
  11 # restriction,  including  without limitation  the  rights to  use,
  12 # copy, modify, merge, publish, distribute, sublicense, and/or sell
  13 # copies  of  the  Software,  and to  permit  persons  to  whom the
  14 # Software  is  furnished  to  do  so,  subject  to  the  following
  15 # conditions:
  16 #
  17 # The above copyright  notice and this  permission notice shall  be
  18 # included in all copies or substantial portions of the Software.
  19 #
  20 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
  21 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
  22 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
  23 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
  24 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
  25 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
  26 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  27 # OTHER DEALINGS IN THE SOFTWARE.
  28 #
  29 """Read from and write to tar format archives.
  30 """
  31
  32 __version__ = "$Revision: 85213 $"
  33 # $Source$
  34
  35 version     = "0.9.0"
  36 __author__  = "Lars Gustäbel (lars@gustaebel.de)"
  37 __date__    = "$Date$"
  38 __cvsid__   = "$Id$"
  39 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend, Eduardo Robles."
  40
  41 #---------
  42 # Imports
  43 #---------
  44 import binascii
  45 import sys
  46 import os
  47 import io
  48 import shutil
  49 import stat
  50 import errno
  51 import time
  52 import struct
  53 import copy
  54 import re
  55 import operator
  56
  57 import traceback # XXX
  58
  59 from . import crypto
  60
  61 try:
  62     import grp, pwd
  63 except ImportError:
  64     grp = pwd = None
  65
  66 # os.symlink on Windows prior to 6.0 raises NotImplementedError
  67 symlink_exception = (AttributeError, NotImplementedError)
  68 try:
  69     # OSError (winerror=1314) will be raised if the caller does not hold the
  70     # SeCreateSymbolicLinkPrivilege privilege
  71     symlink_exception += (OSError,)
  72 except NameError:
  73     pass
  74
  75 # from tarfile import *
  76 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
  77
  78 from builtins import open as _open # Since 'open' is TarFile.open
  79
  80 #---------------------------------------------------------
  81 # tar constants
  82 #---------------------------------------------------------
  83 NUL = b"\0"                     # the null character
  84 BLOCKSIZE = 512                 # length of processing blocks
  85 RECORDSIZE = BLOCKSIZE * 20     # length of records
  86 GNU_MAGIC = b"ustar  \0"        # magic gnu tar string
  87 POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string
  88
  89 LENGTH_NAME = 100               # maximum length of a filename
  90 LENGTH_LINK = 100               # maximum length of a linkname
  91 LENGTH_PREFIX = 155             # maximum length of the prefix field
  92
  93 REGTYPE = b"0"                  # regular file
  94 AREGTYPE = b"\0"                # regular file
  95 LNKTYPE = b"1"                  # link (inside tarfile)
  96 SYMTYPE = b"2"                  # symbolic link
  97 CHRTYPE = b"3"                  # character special device
  98 BLKTYPE = b"4"                  # block special device
  99 DIRTYPE = b"5"                  # directory
 100 FIFOTYPE = b"6"                 # fifo special device
 101 CONTTYPE = b"7"                 # contiguous file
 102
 103 GNUTYPE_LONGNAME = b"L"         # GNU tar longname
 104 GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
 105 GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
 106 GNUTYPE_MULTIVOL = b"M"         # GNU tar continuation of a file that began on
 107                                 # another volume
 108
 109 XHDTYPE = b"x"                  # POSIX.1-2001 extended header
 110 XGLTYPE = b"g"                  # POSIX.1-2001 global header
 111 SOLARIS_XHDTYPE = b"X"          # Solaris extended header
 112
 113 USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
 114 GNU_FORMAT = 1                  # GNU tar format
 115 PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
 116 DEFAULT_FORMAT = GNU_FORMAT
 117
 118 GZ_FMT_HEADER        = b"<BBBBLBB"
 119 GZ_HEADER_SIZE       = 10   # not including the name
 120 GZ_MAGIC             = (0x1f, 0x8b) # 0o37, 0o213
 121 GZ_METHOD_DEFLATE    = 0x08 # 0o10
 122 GZ_FLAG_ORIG_NAME    = 0x08 # 0o10, default in gzip
 123 GZ_DEFLATE_FLAGS     = 0x00 # 0o00, never read (deflate.c)
 124 GZ_OS_CODE           = 0x03 # 0o03, default in gzip (tailor.h)
 125 GZ_MAGIC_BYTES       = struct.pack ("<BB", GZ_MAGIC [0], GZ_MAGIC [1])
 126 GZ_MAGIC_DEFLATE     = struct.pack ("<BBB", GZ_MAGIC [0], GZ_MAGIC [1],
 127                                     GZ_METHOD_DEFLATE)
 128
 129 TOLERANCE_STRICT  = 0
 130 TOLERANCE_RECOVER = 1 # rely on offsets in index
 131 TOLERANCE_RESCUE  = 2 # deduce metadata from archive contents
 132
 133 #---------------------------------------------------------
 134 # archive handling mode
 135 #---------------------------------------------------------
 136
 137 ARCMODE_PLAIN    = 0
 138 ARCMODE_ENCRYPT  = 1 << 0
 139 ARCMODE_COMPRESS = 1 << 1
 140 ARCMODE_CONCAT   = 1 << 2
 141
 142 def arcmode_fmt (m):
 143     if m == ARCMODE_PLAIN:
 144         return "PLAIN"
 145     first = True
 146     ret = "["
 147     def chkappend (b, s):
 148         nonlocal m
 149         nonlocal ret
 150         nonlocal first
 151         if m & b:
 152             if first is True: first = False
 153             else: ret += " |"
 154             ret += " " + s
 155     chkappend (ARCMODE_ENCRYPT,  "ENCRYPT")
 156     chkappend (ARCMODE_COMPRESS, "COMPRESS")
 157     chkappend (ARCMODE_CONCAT,   "CONCAT")
 158     return ret + " ]"
 159
 160
 161 def arcmode_set (concat=False, encryption=None, comptype=None, init=ARCMODE_PLAIN):
 162     ret = init
 163     if bool (concat) is True:
 164         ret |= ARCMODE_CONCAT
 165     if encryption is not None:
 166         ret |= ARCMODE_ENCRYPT
 167     if comptype == "gz":
 168         ret |= ARCMODE_COMPRESS
 169     return ret
 170
 171 #---------------------------------------------------------
 172 # tarfile constants
 173 #---------------------------------------------------------
 174 # File types that tarfile supports:
 175 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
 176                    SYMTYPE, DIRTYPE, FIFOTYPE,
 177                    CONTTYPE, CHRTYPE, BLKTYPE,
 178                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 179                    GNUTYPE_SPARSE, GNUTYPE_MULTIVOL)
 180
 181 # File types that will be treated as a regular file.
 182 REGULAR_TYPES = (REGTYPE, AREGTYPE,
 183                  CONTTYPE, GNUTYPE_SPARSE)
 184
 185 # File types that are part of the GNU tar format.
 186 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 187              GNUTYPE_SPARSE, GNUTYPE_MULTIVOL)
 188
 189 # Fields from a pax header that override a TarInfo attribute.
 190 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
 191               "uid", "gid", "uname", "gname")
 192
 193 # Fields from a pax header that are affected by hdrcharset.
 194 PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
 195
 196 # Fields in a pax header that are numbers, all other fields
 197 # are treated as strings.
 198 PAX_NUMBER_FIELDS = {
 199     "atime": float,
 200     "ctime": float,
 201     "mtime": float,
 202     "uid": int,
 203     "gid": int,
 204     "size": int
 205 }
 206
 207 #---------------------------------------------------------
 208 # initialization
 209 #---------------------------------------------------------
 210
 211 if os.name in ("nt", "ce"):
 212     ENCODING = "utf-8"
 213 else:
 214     ENCODING = sys.getfilesystemencoding()
 215
 216 #---------------------------------------------------------
 217 # Some useful functions
 218 #---------------------------------------------------------
 219
 220 def stn(s, length, encoding, errors):
 221     """Convert a string to a null-terminated bytes object.
 222     """
 223     s = s.encode(encoding, errors)
 224     return s[:length] + (length - len(s)) * NUL
 225
 226 def nts(s, encoding, errors):
 227     """Convert a null-terminated bytes object to a string.
 228     """
 229     p = s.find(b"\0")
 230     if p != -1:
 231         s = s[:p]
 232     return s.decode(encoding, errors)
 233
 234 def sbtn(s, length, encoding, errors):
 235     """Convert a string or a bunch of bytes to a null-terminated bytes object
 236     of specific size.
 237     """
 238     if isinstance(s, str):
 239         s = s.encode(encoding, errors)
 240     return s[:length] + (length - len(s)) * NUL
 241
 242 def nti(s):
 243     """Convert a number field to a python number.
 244     """
 245     # There are two possible encodings for a number field, see
 246     # itn() below.
 247     if s[0] in (0o200, 0o377):
 248         n = 0
 249         for i in range(len(s) - 1):
 250             n <<= 8
 251             n += s[i + 1]
 252         if s[0] == 0o377:
 253             n = -(256 ** (len(s) - 1) - n)
 254     else:
 255         try:
 256             n = int(nts(s, "ascii", "strict") or "0", 8)
 257         except ValueError:
 258             raise InvalidHeaderError("invalid header")
 259     return n
 260
 261 def itn(n, digits=8, format=DEFAULT_FORMAT):
 262     """Convert a python number to a number field.
 263     """
 264     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
 265     # octal digits followed by a null-byte, this allows values up to
 266     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
 267     # that if necessary. A leading 0o200 or 0o377 byte indicate this
 268     # particular encoding, the following digits-1 bytes are a big-endian
 269     # base-256 representation. This allows values up to (256**(digits-1))-1.
 270     # A 0o200 byte indicates a positive number, a 0o377 byte a negative
 271     # number.
 272     if 0 <= n < 8 ** (digits - 1):
 273         s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL
 274     elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
 275         if n >= 0:
 276             s = bytearray([0o200])
 277         else:
 278             s = bytearray([0o377])
 279             n = 256 ** digits + n
 280
 281         for i in range(digits - 1):
 282             s.insert(1, n & 0o377)
 283             n >>= 8
 284     else:
 285         raise ValueError("overflow in number field")
 286
 287     return s
 288
 289 def calc_chksums(buf):
 290     """Calculate the checksum for a member's header by summing up all
 291        characters except for the chksum field which is treated as if
 292        it was filled with spaces. According to the GNU tar sources,
 293        some tars (Sun and NeXT) calculate chksum with signed char,
 294        which will be different if there are chars in the buffer with
 295        the high bit set. So we calculate two checksums, unsigned and
 296        signed.
 297     """
 298     unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
 299     signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
 300     return unsigned_chksum, signed_chksum
 301
 302 def copyfileobj(src, dst, length=None):
 303     """Copy length bytes from fileobj src to fileobj dst.
 304        If length is None, copy the entire content.
 305     """
 306     if length == 0:
 307         return
 308     if length is None:
 309         shutil.copyfileobj(src, dst)
 310         return
 311
 312     BUFSIZE = 16 * 1024
 313     blocks, remainder = divmod(length, BUFSIZE)
 314     for b in range(blocks):
 315         buf = src.read(BUFSIZE)
 316         dst.write(buf)
 317         if len(buf) < BUFSIZE:
 318             raise OSError("end of file reached")
 319     if remainder != 0:
 320         buf = src.read(remainder)
 321         dst.write(buf)
 322         if len(buf) < remainder:
 323             raise OSError("end of file reached")
 324
 325
 326 def filemode(mode):
 327     """Deprecated in this location; use stat.filemode."""
 328     import warnings
 329     warnings.warn("deprecated in favor of stat.filemode",
 330                   DeprecationWarning, 2)
 331     return stat.filemode(mode)
 332
 333 class TarError(Exception):
 334     """Base exception."""
 335     pass
 336 class ExtractError(TarError):
 337     """General exception for extract errors."""
 338     pass
 339 class ReadError(TarError):
 340     """Exception for unreadable tar archives."""
 341     pass
 342 class CompressionError(TarError):
 343     """Exception for unavailable compression methods."""
 344     pass
 345 class StreamError(TarError):
 346     """Exception for unsupported operations on stream-like TarFiles."""
 347     pass
 348 class HeaderError(TarError):
 349     """Base exception for header errors."""
 350     pass
 351 class EmptyHeaderError(HeaderError):
 352     """Exception for empty headers."""
 353     pass
 354 class TruncatedHeaderError(HeaderError):
 355     """Exception for truncated headers."""
 356     pass
 357 class EOFHeaderError(HeaderError):
 358     """Exception for end of file headers."""
 359     pass
 360 class InvalidHeaderError(HeaderError):
 361     """Exception for invalid headers."""
 362     pass
 363 class SubsequentHeaderError(HeaderError):
 364     """Exception for missing and invalid extended headers."""
 365     pass
 366 class InvalidEncryptionError(TarError):
 367     """Exception for undefined crypto modes and combinations."""
 368     pass
 369 class DecryptionError(TarError):
 370     """Exception for error during decryption."""
 371     pass
 372 class EncryptionError(TarError):
 373     """Exception for error during encryption."""
 374     pass
 375 class EndOfFile(Exception):
 376     """Signal end of file condition when they’re not an error."""
 377
 378 #---------------------------
 379 # internal stream interface
 380 #---------------------------
 381 class _LowLevelFile:
 382     """Low-level file object. Supports reading and writing.
 383        It is used instead of a regular file object for streaming
 384        access.
 385     """
 386
 387     def __init__(self, name, mode):
 388         _mode = {
 389             "r": os.O_RDONLY,
 390             "w": os.O_RDWR | os.O_CREAT | os.O_TRUNC,
 391         }[mode]
 392         if hasattr(os, "O_BINARY"):
 393             _mode |= os.O_BINARY                    # pylint: disable=no-member
 394         self.fd = os.open(name, _mode, 0o666)
 395         self.offset = 0
 396
 397     def close(self):
 398         os.close(self.fd)
 399
 400     def read(self, size):
 401         ret = os.read(self.fd, size)
 402         self.offset += len(ret)
 403         return ret
 404
 405     def write(self, s, pos=None):
 406         if pos is not None:
 407             p0 = self.offset
 408             os.lseek (self.fd, pos, os.SEEK_SET)
 409         n = os.write(self.fd, s)
 410         if pos is None:
 411             self.offset += len(s)
 412         else:
 413             append = pos + n - p0
 414             if append > 0:
 415                 self.offset += append
 416             os.lseek (self.fd, p0, os.SEEK_SET)
 417
 418     def tell(self):
 419         return self.offset
 420
 421     def seek_set (self, pos):
 422         os.lseek (self.fd, pos, os.SEEK_SET)
 423         self.offset = pos
 424
 425
 426 def gz_header (name=None):
 427     timestamp = int(time.time())
 428     flags     = 0x0
 429
 430     if name is None:
 431         name = b""
 432     else:
 433         flags |= GZ_FLAG_ORIG_NAME
 434         if type(name) is str:
 435             name = name.encode("iso-8859-1", "replace")
 436         if name.endswith(b".pdtcrypt"):
 437             name = name[:-9]
 438         if name.endswith(b".gz"):
 439             name = name[:-3]
 440         # RFC1952 says we must use ISO-8859-1 for the FNAME field.
 441         name += NUL
 442
 443     hdr = struct.pack (GZ_FMT_HEADER,
 444                        GZ_MAGIC [0], GZ_MAGIC [1],
 445                        GZ_METHOD_DEFLATE, flags,
 446                        timestamp,
 447                        GZ_DEFLATE_FLAGS, GZ_OS_CODE)
 448
 449     return hdr + name
 450
 451
 452 class _Stream:
 453     """Class that serves as an adapter between TarFile and
 454        a stream-like object.  The stream-like object only
 455        needs to have a read() or write() method and is accessed
 456        blockwise.  Use of gzip or bzip2 compression is possible.
 457        A stream-like object could be for example: sys.stdin,
 458        sys.stdout, a socket, a tape device etc.
 459
 460        _Stream is intended to be used only internally but is
 461        nevertherless used externally by Deltatar.
 462
 463        When encrypting, the ``enccounter`` will be used for
 464        initializing the first cryptographic context. When
 465        decrypting, its value will be compared to the decrypted
 466        object. Decryption fails if the value does not match.
 467        In effect, this means that a ``_Stream`` whose ctor was
 468        passed ``enccounter`` can only be used to encrypt or
 469        decrypt a single object.
 470     """
 471
 472     remainder = -1 # track size in encrypted entries
 473     tolerance = TOLERANCE_STRICT
 474
 475     def __init__(self, name, mode, comptype, fileobj, bufsize,
 476                  concat=False, encryption=None, enccounter=None,
 477                  compresslevel=9, tolerance=TOLERANCE_STRICT):
 478         """Construct a _Stream object.
 479         """
 480         self.arcmode = arcmode_set (concat, encryption, comptype)
 481         self.tolerance = tolerance
 482
 483         self._extfileobj = True
 484         if fileobj is None:
 485             fileobj = _LowLevelFile(name, mode)
 486             self._extfileobj = False
 487
 488         if comptype == '*':
 489             # Enable transparent compression detection for the
 490             # stream interface
 491             fileobj = _StreamProxy(fileobj)
 492             comptype = fileobj.getcomptype()
 493         if comptype == '':
 494             comptype = "tar"
 495
 496         self.enccounter = None
 497         if self.arcmode & ARCMODE_ENCRYPT:
 498             self.enccounter = enccounter
 499
 500         self.name     = name or ""
 501         self.mode     = mode
 502         self.comptype = comptype
 503         self.cmp      = None
 504         self.fileobj  = fileobj
 505         self.bufsize  = bufsize
 506         self.buf      = b""
 507         self.pos      = 0
 508         self.concat_pos = 0
 509         self.closed   = False
 510         self.flags    = 0
 511         self.last_block_offset = 0
 512         self.dbuf     = b"" # ???
 513         self.exception = None # communicate decompression failure
 514         self.compresslevel = compresslevel
 515         self.bytes_written = 0
 516         # crypto parameters
 517         self.encryption = encryption
 518         self.lasthdr    = None
 519
 520         try:
 521             if comptype == "gz":
 522                 try:
 523                     import zlib
 524                 except ImportError:
 525                     raise CompressionError("zlib module is not available")
 526                 self.zlib = zlib
 527                 if mode == "r":
 528                     self.exception = zlib.error
 529                     self._init_read_gz()
 530                 elif mode == "w":
 531                     if not (self.arcmode & ARCMODE_CONCAT):
 532                         if self.arcmode & ARCMODE_ENCRYPT:
 533                             self._init_write_encrypt (name)
 534                         self._init_write_gz ()
 535                 self.crc = zlib.crc32(b"") & 0xFFFFffff
 536
 537             elif comptype == "bz2":
 538                 if self.arcmode & ARCMODE_ENCRYPT:
 539                     raise InvalidEncryptionError("encryption not available for "
 540                                                  "compression “%s”" % comptype)
 541                 try:
 542                     import bz2
 543                 except ImportError:
 544                     raise CompressionError("bz2 module is not available")
 545                 if mode == "r":
 546                     self.dbuf = b""
 547                     self.cmp = bz2.BZ2Decompressor()
 548                     self.exception = OSError
 549                 else:
 550                     self.cmp = bz2.BZ2Compressor()
 551
 552             elif comptype == 'xz':
 553                 if self.arcmode & ARCMODE_ENCRYPT:
 554                     raise InvalidEncryptionError("encryption not available for "
 555                                                  "compression “%s”" % comptype)
 556                 try:
 557                     import lzma
 558                 except ImportError:
 559                     raise CompressionError("lzma module is not available")
 560                 if mode == "r":
 561                     self.dbuf = b""
 562                     self.cmp = lzma.LZMADecompressor()
 563                     self.exception = lzma.LZMAError
 564                 else:
 565                     self.cmp = lzma.LZMACompressor()
 566
 567             elif comptype == "tar":
 568                 if not (self.arcmode & ARCMODE_CONCAT) \
 569                         and mode == "w" \
 570                         and self.arcmode & ARCMODE_ENCRYPT:
 571                     self._init_write_encrypt (name)
 572
 573             else:
 574                 if self.arcmode & ARCMODE_ENCRYPT:
 575                     raise InvalidEncryptionError("encryption not available for "
 576                                                  "compression “%s”" % comptype)
 577                 raise CompressionError("unknown compression type %r" % comptype)
 578
 579         except:
 580             if not self._extfileobj:
 581                 self.fileobj.close()
 582             self.closed = True
 583             raise
 584
 585     def __del__(self):
 586         if hasattr(self, "closed") and not self.closed:
 587             try:
 588                 self.close()
 589             except crypto.InternalError:
 590                 # context already finalized due to abort but close() tried
 591                 # to use it
 592                 pass
 593
 594
 595     def next (self, name):
 596         if self.arcmode & ARCMODE_COMPRESS:
 597             if getattr (self, "cmp", None) is not None:
 598                 self._finalize_write_gz ()
 599         self.__sync()
 600         if self.arcmode & ~(ARCMODE_ENCRYPT | ARCMODE_COMPRESS):
 601             self.last_block_offset = self.fileobj.tell()
 602         if self.arcmode & ARCMODE_ENCRYPT:
 603             self._finalize_write_encrypt ()
 604             self._init_write_encrypt (name, set_last_block_offset=True)
 605         if self.arcmode & ARCMODE_COMPRESS:
 606             self._init_write_gz (set_last_block_offset =
 607                                  not (self.arcmode & ARCMODE_ENCRYPT))
 608         return self.last_block_offset
 609
 610
 611     def next_volume (self, name):
 612         # with non-concat modes, this is taken care by the _Stream
 613         # ctor as invoked by the newvol handler
 614         if self.arcmode & ARCMODE_COMPRESS:
 615             if getattr (self, "cmp", None) is not None:
 616                 # e. g. compressed PAX header written
 617                 self._finalize_write_gz ()
 618         if self.arcmode & ARCMODE_ENCRYPT:
 619             self._init_write_encrypt (name)
 620         if self.arcmode & ARCMODE_COMPRESS:
 621             self._init_write_gz ()
 622
 623
 624     def _init_write_encrypt (self, entry=None, set_last_block_offset=False):
 625         """
 626         Save position for delayed write of header; fill the header location
 627         with dummy bytes.
 628         """
 629         # first thing, proclaim new object to the encryption context
 630         # secondly, assemble the header with the updated parameters
 631         # and commit it directly to the underlying stream, bypassing the
 632         # encryption layer in .__write().
 633         dummyhdr = self.encryption.next (entry, counter=self.enccounter)
 634         if dummyhdr is None:
 635             raise EncryptionError ("Crypto.next(): bad dummy header") # XXX
 636         self.lasthdr = self.fileobj.tell()
 637         self.__write_to_file(dummyhdr)
 638         if set_last_block_offset is True:
 639             self.last_block_offset = self.lasthdr
 640
 641
 642     def _finalize_write_encrypt (self):
 643         """
 644         Seek back to header position, read dummy bytes, finalize crypto
 645         obtaining the actual header, write header, seek back to current
 646         position.
 647
 648         Returns the list of IV fixed parts as used during encryption.
 649         """
 650         if self.lasthdr is not None:
 651             pos0 = self.fileobj.tell ()
 652             self.fileobj.seek_set (self.lasthdr)
 653             dummy = self.fileobj.read (crypto.PDTCRYPT_HDR_SIZE)
 654             pos1 = self.fileobj.tell ()
 655             dpos = pos1 - self.lasthdr
 656             assert dpos == crypto.PDTCRYPT_HDR_SIZE
 657             self.fileobj.seek_set (pos0)
 658             data, hdr, _ = self.encryption.done (dummy)
 659             self.__write_to_file(hdr, pos=self.lasthdr)
 660             self.__write_to_file(data) # append remainder of data
 661             self.lasthdr = -1
 662
 663
 664     def _finalize_write_gz (self):
 665         if self.cmp is not None:
 666             chunk = self.buf + self.cmp.flush()
 667             if chunk:
 668                 if self.comptype == "gz":
 669                     # The native zlib crc is an unsigned 32-bit integer, but
 670                     # the Python wrapper implicitly casts that to a signed C
 671                     # long.  So, on a 32-bit box self.crc may "look negative",
 672                     # while the same crc on a 64-bit box may "look positive".
 673                     # To avoid irksome warnings from the `struct` module, force
 674                     # it to look positive on all boxes.
 675                     chunk += struct.pack("<L", self.crc & 0xffffffff)
 676                     chunk += struct.pack("<L", self.concat_pos & 0xffffFFFF)
 677                 self.__enc_write (chunk)
 678                 self.buf = b""
 679
 680
 681     def _init_write_gz (self, set_last_block_offset=False):
 682         '''
 683         Add a new gzip block, closing last one
 684         '''
 685         self.concat_pos = 0
 686         self.crc = self.zlib.crc32(b"") & 0xFFFFffff
 687         first = self.cmp is None
 688         self.cmp = self.zlib.compressobj(self.compresslevel,
 689                                          self.zlib.DEFLATED,
 690                                          -self.zlib.MAX_WBITS,
 691                                          self.zlib.DEF_MEM_LEVEL,
 692                                          0)
 693
 694         # if aes, we encrypt after compression
 695         if set_last_block_offset is True:
 696             self.last_block_offset = self.fileobj.tell()
 697
 698         self.__write(gz_header (self.name if first is True else None))
 699
 700
 701     def write(self, s):
 702         """Write string s to the stream.
 703         """
 704         if self.comptype == "gz":
 705             self.crc = self.zlib.crc32(s, self.crc) & 0xFFFFffff
 706         self.pos += len(s)
 707         self.concat_pos += len(s)
 708         if self.cmp is not None:
 709             s = self.cmp.compress(s)
 710         self.__write(s)
 711
 712     def __sync(self):
 713         """Write what’s left in the buffer to the stream."""
 714         self.__write (b"") # → len (buf) <= bufsiz
 715         self.__enc_write (self.buf)
 716         self.buf = b""
 717
 718     def __write(self, s):
 719         """Writes (and encodes) string s to the stream blockwise
 720
 721         will wait with encoding/writing until block is complete
 722         """
 723         self.buf += s
 724         while len(self.buf) > self.bufsize:
 725             self.__enc_write(self.buf[:self.bufsize])
 726             self.buf = self.buf[self.bufsize:]
 727
 728
 729     def __write_to_file(self, s, pos=None):
 730         '''
 731         Writes directly to the fileobj; updates self.bytes_written. If “pos” is
 732         given, the stream will seek to that position first and back afterwards,
 733         and the total of bytes written is not updated.
 734         '''
 735         self.fileobj.write(s, pos)
 736         if pos is None:
 737             self.bytes_written += len(s)
 738
 739
 740     def __enc_write(self, s):
 741         """
 742         If encryption is active, the string s is encrypted before being written
 743         to the file.
 744         """
 745         if len (s) == 0:
 746             return
 747         if self.arcmode & ARCMODE_ENCRYPT:
 748             buf = s
 749             while len (buf) > 0:
 750                 n, ct = self.encryption.process(buf)
 751                 self.__write_to_file(ct)
 752                 buf = buf [n:]
 753                 if len (buf) > 0:
 754                     # The entire plaintext was not consumed: The size limit
 755                     # for encrypted objects was reached. Transparently create
 756                     # a new encrypted object and continue processing the input.
 757                     self._finalize_write_encrypt ()
 758                     self._init_write_encrypt ()
 759         else:
 760             self.__write_to_file(s)
 761
 762
 763     def estim_file_size(self):
 764         """ estimates size of file if closing it now
 765
 766         The result may differ greatly from the amount of data sent to write()
 767         due to compression, encryption and buffering.
 768
 769         In tests the result (before calling close()) was up to 12k smaller than
 770         the final file size if compression is being used because zlib/bz2
 771         compressors do not allow inspection of their buffered data :-(
 772
 773         Still, we add what close() would add: 8 bytes for gz checksum, one
 774         encryption block size if encryption is used and the size of our own
 775         buffer
 776         """
 777         if self.closed:
 778             return self.bytes_written
 779
 780         result = self.bytes_written
 781         if self.buf:
 782             result += len(self.buf)
 783         if self.comptype == 'gz':
 784             result += 8   # 2 longs = 8 byte (no extra info written for bzip2)
 785         return result
 786
 787     def close(self, close_fileobj=True):
 788         """Close the _Stream object. No operation should be
 789            done on it afterwards.
 790         """
 791
 792         if self.closed:
 793             return
 794
 795         if close_fileobj is True:
 796
 797             if self.mode == "w":
 798                 if self.arcmode & ARCMODE_COMPRESS:
 799                     self._finalize_write_gz ()
 800                 # end of Tar archive marker (two empty blocks) was written
 801                 # finalize encryption last; no writes may be performed after
 802                 # this point
 803                 self.__sync ()
 804                 if self.arcmode & ARCMODE_ENCRYPT:
 805                     self._finalize_write_encrypt ()
 806
 807             if not self._extfileobj:
 808                 self.fileobj.close()
 809         else:
 810             # read the zlib crc and length and check them
 811             if self.mode == "r" and self.comptype == "gz":
 812                 read_crc = self.__read(4)
 813                 read_length = self.__read(4)
 814                 calculated_crc = self.crc
 815                 if struct.unpack("<L", read_crc)[0] != calculated_crc:
 816                     raise CompressionError("bad gzip crc")
 817         self.closed = True
 818
 819
 820     def _init_read_gz(self):
 821         """Initialize for reading a gzip compressed fileobj.
 822         """
 823         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
 824
 825         read2 = self.__read(2)
 826         if read2 == b"":
 827             raise EndOfFile ("_init_read_gz(): read returned zero bytes at pos "
 828                              "%d" % self.fileobj.tell())
 829         # taken from gzip.GzipFile with some alterations
 830         if read2 != GZ_MAGIC_BYTES:
 831             raise ReadError("not a gzip file")
 832
 833         read1 = self.__read(1)
 834         if read1 != b"\010":
 835             raise CompressionError("unsupported compression method")
 836
 837         self.flags = flag = ord(self.__read(1))
 838         self.__read(6)
 839
 840         if flag & 4:
 841             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
 842             self.read(xlen)
 843         if flag & 8:
 844             while True:
 845                 s = self.__read(1)
 846                 if not s or s == NUL:
 847                     break
 848         if flag & 16:
 849             while True:
 850                 s = self.__read(1)
 851                 if not s or s == NUL:
 852                     break
 853         if flag & 2:
 854             self.__read(2)
 855
 856     def _init_read_encrypt (self):
 857         """Initialize encryption for next entry in archive. Read a header and
 858         notify the crypto context."""
 859         if self.arcmode & ARCMODE_ENCRYPT:
 860             lasthdr = self.fileobj.tell ()
 861             try:
 862                 hdr = crypto.hdr_read_stream (self.fileobj)
 863             except crypto.EndOfFile:
 864                 return False
 865             except crypto.InvalidHeader as exn:
 866                 raise DecryptionError ("Crypto.hdr_read_stream(): error “%s” "
 867                                        "processing %r at pos %d"
 868                                        % (exn, self.fileobj, lasthdr)) \
 869                       from exn
 870             if self.enccounter is not None:
 871                 # enforce that the iv counter in the header matches an
 872                 # explicitly requested one
 873                 iv = crypto.hdr_iv_counter (hdr)
 874                 if iv != self.enccounter:
 875                     raise DecryptionError ("expected IV counter %d, got %d"
 876                                            % (self.enccounter, iv))
 877             self.lasthdr   = lasthdr
 878             self.remainder = hdr ["ctsize"] # distance to next header
 879             try:
 880                 self.encryption.next (hdr)
 881             except crypto.InvalidParameter as exn:
 882                 raise DecryptionError ("Crypto.next(): error “%s” "
 883                                        "processing %r at pos %d"
 884                                        % (exn, self.fileobj, lasthdr)) \
 885                       from exn
 886
 887         return True
 888
 889
 890     def _read_encrypt (self, buf):
 891         """
 892         Demote a program error to a decryption error in tolerant mode. This
 893         allows recovery from corrupted headers and invalid data.
 894         """
 895         try:
 896             return self.encryption.process (buf)
 897         except RuntimeError as exn:
 898             if self.tolerance != TOLERANCE_STRICT:
 899                 raise DecryptionError (exn)
 900             raise
 901
 902
 903     def _finalize_read_encrypt (self):
 904         """
 905         Finalize decryption.
 906         """
 907         if      self.arcmode & ARCMODE_ENCRYPT \
 908             and self.lasthdr is not None :
 909             assert self.remainder >= 0
 910             if self.remainder > 0:
 911                 self.remainder = 0
 912             try:
 913                 data = self.encryption.done ()
 914             except crypto.InvalidGCMTag as exn:
 915                 raise DecryptionError ("decryption failed: %s" % exn)
 916             return data
 917
 918
 919     def tell(self):
 920         """Return the stream's file pointer position.
 921         """
 922         return self.pos
 923
 924     def seek(self, pos=0):
 925         """Set the stream's file pointer to pos. Negative seeking
 926            is forbidden.
 927         """
 928         if pos - self.pos >= 0:
 929             blocks, remainder = divmod(pos - self.pos, self.bufsize)
 930             for i in range(blocks):
 931                 self.read(self.bufsize)
 932             self.read(remainder)
 933         else:
 934             raise StreamError("seeking backwards is not allowed")
 935         return self.pos
 936
 937     def read(self, size=None):
 938         """Return the next size number of bytes from the stream.
 939            If size is not defined, return all bytes of the stream
 940            up to EOF.
 941         """
 942         if size is None:
 943             t = []
 944             while True:
 945                 buf = self._read(self.bufsize)
 946                 if not buf:
 947                     break
 948                 t.append(buf)
 949             buf = b"".join(t)
 950         else:
 951             buf = self._read(size)
 952         self.pos += len(buf)
 953         return buf
 954
 955     def readline(self):
 956         """Reads just one line, new line character included
 957         """
 958         # if \n in dbuf, no read neads to be done
 959         if b'\n' in self.dbuf:
 960             pos = self.dbuf.index(b'\n') + 1
 961             ret = self.dbuf[:pos]
 962             self.dbuf = self.dbuf[pos:]
 963             return ret
 964
 965         buf = []
 966         while True:
 967             chunk = self._read(self.bufsize)
 968
 969             # nothing more to read, so return the buffer
 970             if not chunk:
 971                 return b''.join(buf)
 972
 973             buf.append(chunk)
 974
 975             # if \n found, return the new line
 976             if b'\n' in chunk:
 977                 dbuf = b''.join(buf)
 978                 pos = dbuf.index(b'\n') + 1
 979                 self.dbuf = dbuf[pos:] + self.dbuf
 980                 return dbuf[:pos]
 981
 982     def _read(self, size):
 983         """Return size bytes from the stream.
 984         """
 985         c = len(self.dbuf)
 986         t = [self.dbuf]
 987
 988         while c < size:
 989             buf = self.__read(self.bufsize)
 990             if not buf:
 991                 break
 992
 993             if self.cmp is not None:
 994                 try:
 995                     buf = self.cmp.decompress(buf)
 996                 except self.exception as exn:
 997                     raise ReadError("invalid compressed data (%r)" % exn)
 998                 except Exception as e:
 999                     # happens at the end of the file
1000                     # _init_read_gz failed in the previous iteration so
1001                     # self.cmp.decompress fails here
1002                     if self.arcmode & ARCMODE_CONCAT:
1003                         pass
1004                     else:
1005                         raise ReadError("invalid compressed data")
1006                 if self.arcmode & ARCMODE_COMPRESS and hasattr(self, "crc"):
1007                     self.crc = self.zlib.crc32(buf, self.crc) & 0xFFFFffff
1008                 if self.arcmode & ARCMODE_CONCAT \
1009                         and len(self.cmp.unused_data) != 0:
1010                     self.buf = self.cmp.unused_data + self.buf
1011                     self.close(close_fileobj=False)
1012                     try:
1013                         self._init_read_gz()
1014                     except DecryptionError:
1015                         if self.tolerance != TOLERANCE_STRICT:
1016                             # return whatever data was processed successfully
1017                             if len (buf) > 0:
1018                                 t.append (buf)
1019                             if len (t) > 0:
1020                                 break
1021                             raise
1022                     except EndOfFile:
1023                         # happens at the end of the file
1024                         pass
1025                     self.crc = self.zlib.crc32(b"") & 0xFFFFffff
1026                     self.closed = False
1027             t.append(buf)
1028             c += len(buf)
1029         t = b"".join(t)
1030         self.dbuf = t[size:]
1031         return t[:size]
1032
1033
1034     def __read(self, size):
1035         """
1036         Return size bytes from stream. If internal buffer is empty, read
1037         another block from the stream.
1038
1039         The function returns up to size bytes of data. When an error occurs
1040         during decryption, everything until the end of the last successfully
1041         finalized object is returned.
1042         """
1043         c = len(self.buf)
1044         t = [self.buf] if c > 0 else []
1045         good_crypto = len (t)
1046
1047         while c < size:
1048             todo = size
1049             try:
1050                 if self.arcmode & ARCMODE_ENCRYPT:
1051                     if self.remainder <= 0:
1052                         # prepare next object
1053                         if self._init_read_encrypt () is False: # EOF
1054                             buf = None
1055                             break # while
1056
1057                     # only read up to the end of the encrypted object
1058                     todo = min (size, self.remainder)
1059                 buf = self.fileobj.read(todo)
1060                 if self.arcmode & ARCMODE_ENCRYPT:
1061                     # decrypt the thing
1062                     buf = self._read_encrypt (buf)
1063                     if todo == self.remainder:
1064                         # at the end of a crypto object; finalization will fail if
1065                         # the GCM tag does not match
1066                         trailing = self._finalize_read_encrypt ()
1067                         good_crypto = len (t) + 1
1068                         if len (trailing) > 0:
1069                             buf += trailing
1070                         self.remainder = 0
1071                     else:
1072                         self.remainder -= todo
1073             except DecryptionError:
1074                 if self.tolerance == TOLERANCE_STRICT:
1075                     raise
1076                 self.encryption.drop ()
1077                 if good_crypto == 0:
1078                     raise
1079                 # this may occur at any of the three crypto operations above.
1080                 # some objects did validate; discard all data after it; next
1081                 # call will start with the bad object and error out immediately
1082                 self.buf = b"".join (t [good_crypto:])
1083                 return b"".join (t [:good_crypto])
1084
1085             if not buf: ## XXX stream terminated prematurely; this should be an error
1086                 break
1087
1088             t.append(buf)
1089             c += len(buf)
1090         t = b"".join(t)
1091         self.buf = t[size:]
1092
1093         return t[:size]
1094
1095
1096 class _StreamProxy(object):
1097     """Small proxy class that enables transparent compression
1098        detection for the Stream interface (mode 'r|*').
1099     """
1100
1101     def __init__(self, fileobj):
1102         self.fileobj = fileobj
1103         self.buf = self.fileobj.read(BLOCKSIZE)
1104
1105     def read(self, size):                       # pylint: disable=method-hidden
1106         self.read = self.fileobj.read
1107         return self.buf
1108
1109     def getcomptype(self):
1110         if self.buf.startswith(GZ_MAGIC_DEFLATE):
1111             return "gz"
1112         elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
1113             return "bz2"
1114         elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
1115             return "xz"
1116         else:
1117             return "tar"
1118
1119     def close(self):
1120         self.fileobj.close()
1121 # class StreamProxy
1122
1123 #------------------------
1124 # Extraction file object
1125 #------------------------
1126 class _FileInFile(object):
1127     """A thin wrapper around an existing file object that
1128        provides a part of its data as an individual file
1129        object.
1130     """
1131
1132     def __init__(self, fileobj, offset, size, blockinfo=None):
1133         self.fileobj = fileobj
1134         self.offset = offset
1135         self.size = size
1136         self.position = 0
1137         self.name = getattr(fileobj, "name", None)
1138         self.closed = False
1139
1140         if blockinfo is None:
1141             blockinfo = [(0, size)]
1142
1143         # Construct a map with data and zero blocks.
1144         self.map_index = 0
1145         self.map = []
1146         lastpos = 0
1147         realpos = self.offset
1148         for offset, size in blockinfo:
1149             if offset > lastpos:
1150                 self.map.append((False, lastpos, offset, None))
1151             self.map.append((True, offset, offset + size, realpos))
1152             realpos += size
1153             lastpos = offset + size
1154         if lastpos < self.size:
1155             self.map.append((False, lastpos, self.size, None))
1156
1157     def flush(self):
1158         pass
1159
1160     def readable(self):
1161         return True
1162
1163     def writable(self):
1164         return False
1165
1166     def seekable(self):
1167         return self.fileobj.seekable()
1168
1169     def tell(self):
1170         """Return the current file position.
1171         """
1172         return self.position
1173
1174     def seek(self, position, whence=io.SEEK_SET):
1175         """Seek to a position in the file.
1176         """
1177         if whence == io.SEEK_SET:
1178             self.position = min(max(position, 0), self.size)
1179         elif whence == io.SEEK_CUR:
1180             if position < 0:
1181                 self.position = max(self.position + position, 0)
1182             else:
1183                 self.position = min(self.position + position, self.size)
1184         elif whence == io.SEEK_END:
1185             self.position = max(min(self.size + position, self.size), 0)
1186         else:
1187             raise ValueError("Invalid argument")
1188         return self.position
1189
1190     def read(self, size=None):
1191         """Read data from the file.
1192         """
1193         if size is None:
1194             size = self.size - self.position
1195         else:
1196             size = min(size, self.size - self.position)
1197
1198         buf = b""
1199         while size > 0:
1200             while True:
1201                 data, start, stop, offset = self.map[self.map_index]
1202                 if start <= self.position < stop:
1203                     break
1204                 else:
1205                     self.map_index += 1
1206                     if self.map_index == len(self.map):
1207                         self.map_index = 0
1208             length = min(size, stop - self.position)
1209             if data:
1210                 self.fileobj.seek(offset + (self.position - start))
1211                 buf += self.fileobj.read(length)
1212             else:
1213                 buf += NUL * length
1214             size -= length
1215             self.position += length
1216         return buf
1217
1218     def readinto(self, b):
1219         buf = self.read(len(b))
1220         b[:len(buf)] = buf
1221         return len(buf)
1222
1223     def close(self):
1224         self.closed = True
1225 #class _FileInFile
1226
1227
1228 class ExFileObject(io.BufferedReader):
1229
1230     def __init__(self, tarfile, tarinfo):
1231         fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
1232                 tarinfo.size, tarinfo.sparse)
1233         super().__init__(fileobj)
1234 #class ExFileObject
1235
1236 #------------------
1237 # Exported Classes
1238 #------------------
1239 class TarInfo(object):
1240     """Informational class which holds the details about an
1241        archive member given by a tar header block.
1242        TarInfo objects are returned by TarFile.getmember(),
1243        TarFile.getmembers() and TarFile.gettarinfo() and are
1244        usually created internally.
1245     """
1246
1247     __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
1248                  "chksum", "type", "linkname", "uname", "gname",
1249                  "devmajor", "devminor", "volume_offset",
1250                  "offset", "offset_data", "pax_headers", "sparse",
1251                  "tarfile", "_sparse_structs", "_link_target")
1252
1253     def __init__(self, name=""):
1254         """Construct a TarInfo object. name is the optional name
1255            of the member.
1256         """
1257         self.name = name        # member name
1258         self.mode = 0o644       # file permissions
1259         self.uid = 0            # user id
1260         self.gid = 0            # group id
1261         self.size = 0           # file size
1262         self.mtime = 0          # modification time
1263         self.chksum = 0         # header checksum
1264         self.type = REGTYPE     # member type
1265         self.linkname = ""      # link name
1266         self.uname = ""         # user name
1267         self.gname = ""         # group name
1268         self.devmajor = 0       # device major number
1269         self.devminor = 0       # device minor number
1270
1271         self.offset = 0         # the tar header starts here
1272         self.offset_data = 0    # the file's data starts here
1273         self.volume_offset = 0  # the file's data corresponds with the data
1274                                 # starting at this position
1275
1276         self.sparse = None      # sparse member information
1277         self.pax_headers = {}   # pax header information
1278
1279     # In pax headers the "name" and "linkname" field are called
1280     # "path" and "linkpath".
1281     def _getpath(self):
1282         return self.name
1283     def _setpath(self, name):
1284         self.name = name
1285     path = property(_getpath, _setpath)
1286
1287     def _getlinkpath(self):
1288         return self.linkname
1289     def _setlinkpath(self, linkname):
1290         self.linkname = linkname
1291     linkpath = property(_getlinkpath, _setlinkpath)
1292
1293     def __repr__(self):
1294         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
1295
1296     def get_info(self, encoding=None, errors=None):
1297         """Return the TarInfo's attributes as a dictionary.
1298         """
1299         info = {
1300             "name":     self.name,
1301             "mode":     self.mode & 0o7777,
1302             "uid":      self.uid,
1303             "gid":      self.gid,
1304             "size":     self.size,
1305             "mtime":    self.mtime,
1306             "chksum":   self.chksum,
1307             "type":     self.type,
1308             "linkname": self.linkname,
1309             "uname":    self.uname,
1310             "gname":    self.gname,
1311             "devmajor": self.devmajor,
1312             "devminor": self.devminor,
1313             "offset_data": self.offset_data,
1314             "volume_offset": self.volume_offset
1315         }
1316
1317         if info["type"] == DIRTYPE and not info["name"].endswith("/"):
1318             info["name"] += "/"
1319
1320         return info
1321
1322     def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING,
1323               errors="surrogateescape"):
1324         """Return a tar header as a string of 512 byte blocks.
1325         """
1326         info = self.get_info(encoding, errors)
1327
1328         if format == USTAR_FORMAT:
1329             return self.create_ustar_header(info, encoding, errors)
1330         elif format == GNU_FORMAT:
1331             return self.create_gnu_header(info, encoding, errors)
1332         elif format == PAX_FORMAT:
1333             return self.create_pax_header(info, encoding, errors)
1334         else:
1335             raise ValueError("invalid format")
1336
1337     def create_ustar_header(self, info, encoding, errors):
1338         """Return the object as a ustar header block.
1339         """
1340         info["magic"] = POSIX_MAGIC
1341
1342         if len(info["linkname"]) > LENGTH_LINK:
1343             raise ValueError("linkname is too long")
1344
1345         if len(info["name"]) > LENGTH_NAME:
1346             info["prefix"], info["name"] = self._posix_split_name(info["name"])
1347
1348         return self._create_header(info, USTAR_FORMAT, encoding, errors)
1349
1350     def create_gnu_header(self, info, encoding, errors):
1351         """Return the object as a GNU header block sequence.
1352         """
1353         info["magic"] = GNU_MAGIC
1354
1355         if self.ismultivol():
1356             prefix = [
1357                 itn(info.get("atime", 0), 12, GNU_FORMAT),
1358                 itn(info.get("ctime", 0), 12, GNU_FORMAT),
1359                 itn(self.volume_offset, 12, GNU_FORMAT),
1360                 itn(0, 119, GNU_FORMAT), # stuff unused in this tar implementation, set to zero
1361             ]
1362             info['prefix'] = b"".join(prefix)
1363             info['size'] = info['size'] - self.volume_offset
1364
1365         buf = b""
1366         if len(info["linkname"]) > LENGTH_LINK:
1367             buf += self._create_gnu_long_header(info["linkname"],
1368                 GNUTYPE_LONGLINK, encoding, errors)
1369
1370         if len(info["name"]) > LENGTH_NAME:
1371             buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME,
1372                                                 encoding, errors)
1373
1374         return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
1375
1376     def create_pax_header(self, info, encoding, errors):
1377         """Return the object as a ustar header block. If it cannot be
1378            represented this way, prepend a pax extended header sequence
1379            with supplement information.
1380         """
1381         info["magic"] = POSIX_MAGIC
1382         pax_headers = self.pax_headers.copy()
1383         if self.ismultivol():
1384             info['size'] = info['size'] - self.volume_offset
1385
1386         # Test string fields for values that exceed the field length or cannot
1387         # be represented in ASCII encoding.
1388         for name, hname, length in (
1389                 ("name", "path", LENGTH_NAME),
1390                 ("linkname", "linkpath", LENGTH_LINK),
1391                 ("uname", "uname", 32),
1392                 ("gname", "gname", 32)):
1393
1394             if hname in pax_headers:
1395                 # The pax header has priority.
1396                 continue
1397
1398             # Try to encode the string as ASCII.
1399             try:
1400                 info[name].encode("ascii", "strict")
1401             except UnicodeEncodeError:
1402                 pax_headers[hname] = info[name]
1403                 continue
1404
1405             if len(info[name]) > length:
1406                 pax_headers[hname] = info[name]
1407
1408         # Test number fields for values that exceed the field limit or values
1409         # that like to be stored as float.
1410         for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1411             if name in pax_headers:
1412                 # The pax header has priority. Avoid overflow.
1413                 info[name] = 0
1414                 continue
1415
1416             val = info[name]
1417             if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1418                 pax_headers[name] = str(val)
1419                 info[name] = 0
1420
1421         # Create a pax extended header if necessary.
1422         if pax_headers:
1423             buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
1424         else:
1425             buf = b""
1426
1427         return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
1428
1429     @classmethod
1430     def create_pax_global_header(cls, pax_headers):
1431         """Return the object as a pax global header block sequence.
1432         """
1433         return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
1434
1435     def _posix_split_name(self, name):
1436         """Split a name longer than 100 chars into a prefix
1437            and a name part.
1438         """
1439         prefix = name[:LENGTH_PREFIX + 1]
1440         while prefix and prefix[-1] != "/":
1441             prefix = prefix[:-1]
1442
1443         name = name[len(prefix):]
1444         prefix = prefix[:-1]
1445
1446         if not prefix or len(name) > LENGTH_NAME:
1447             raise ValueError("name is too long")
1448         return prefix, name
1449
1450     @staticmethod
1451     def _create_header(info, format, encoding, errors):
1452         """Return a header block. info is a dictionary with file
1453            information, format must be one of the *_FORMAT constants.
1454         """
1455         parts = [
1456             stn(info.get("name", ""), 100, encoding, errors),
1457             itn(info.get("mode", 0) & 0o7777, 8, format),
1458             itn(info.get("uid", 0), 8, format),
1459             itn(info.get("gid", 0), 8, format),
1460             itn(info.get("size", 0), 12, format),
1461             itn(info.get("mtime", 0), 12, format),
1462             b"        ", # checksum field
1463             info.get("type", REGTYPE),
1464             stn(info.get("linkname", ""), 100, encoding, errors),
1465             info.get("magic", POSIX_MAGIC),
1466             stn(info.get("uname", ""), 32, encoding, errors),
1467             stn(info.get("gname", ""), 32, encoding, errors),
1468             itn(info.get("devmajor", 0), 8, format),
1469             itn(info.get("devminor", 0), 8, format),
1470             sbtn(info.get("prefix", ""), 155, encoding, errors)
1471         ]
1472
1473         buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
1474         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1475         buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
1476         return buf
1477
1478     @staticmethod
1479     def _create_payload(payload):
1480         """Return the string payload filled with zero bytes
1481            up to the next 512 byte border.
1482         """
1483         blocks, remainder = divmod(len(payload), BLOCKSIZE)
1484         if remainder > 0:
1485             payload += (BLOCKSIZE - remainder) * NUL
1486         return payload
1487
1488     @classmethod
1489     def _create_gnu_long_header(cls, name, type, encoding, errors):
1490         """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1491            for name.
1492         """
1493         name = name.encode(encoding, errors) + NUL
1494
1495         info = {}
1496         info["name"] = "././@LongLink"
1497         info["type"] = type
1498         info["size"] = len(name)
1499         info["magic"] = GNU_MAGIC
1500
1501         # create extended header + name blocks.
1502         return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
1503                 cls._create_payload(name)
1504
1505     @classmethod
1506     def _create_pax_generic_header(cls, pax_headers, type, encoding):
1507         """Return a POSIX.1-2008 extended or global header sequence
1508            that contains a list of keyword, value pairs. The values
1509            must be strings.
1510         """
1511         # Check if one of the fields contains surrogate characters and thereby
1512         # forces hdrcharset=BINARY, see _proc_pax() for more information.
1513         binary = False
1514         for keyword, value in pax_headers.items():
1515             try:
1516                 value.encode("utf-8", "strict")
1517             except UnicodeEncodeError:
1518                 binary = True
1519                 break
1520
1521         records = b""
1522         if binary:
1523             # Put the hdrcharset field at the beginning of the header.
1524             records += b"21 hdrcharset=BINARY\n"
1525
1526         for keyword, value in pax_headers.items():
1527             keyword = keyword.encode("utf-8")
1528             if binary:
1529                 # Try to restore the original byte representation of `value'.
1530                 # Needless to say, that the encoding must match the string.
1531                 value = value.encode(encoding, "surrogateescape")
1532             else:
1533                 value = value.encode("utf-8")
1534
1535             l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1536             n = p = 0
1537             while True:
1538                 n = l + len(str(p))
1539                 if n == p:
1540                     break
1541                 p = n
1542             records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1543
1544         # We use a hardcoded "././@PaxHeader" name like star does
1545         # instead of the one that POSIX recommends.
1546         info = {}
1547         info["name"] = "././@PaxHeader"
1548         info["type"] = type
1549         info["size"] = len(records)
1550         info["magic"] = POSIX_MAGIC
1551
1552         # Create pax header + record blocks.
1553         return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1554                 cls._create_payload(records)
1555
1556     @classmethod
1557     def frombuf(cls, buf, encoding, errors):
1558         """Construct a TarInfo object from a 512 byte bytes object.
1559         """
1560         if len(buf) == 0:
1561             raise EmptyHeaderError("empty header")
1562         if len(buf) != BLOCKSIZE:
1563             raise TruncatedHeaderError("truncated header")
1564         if buf.count(NUL) == BLOCKSIZE:
1565             raise EOFHeaderError("end of file header")
1566
1567         chksum = nti(buf[148:156])
1568         if chksum not in calc_chksums(buf):
1569             raise InvalidHeaderError("bad checksum")
1570
1571         obj = cls()
1572         obj.name = nts(buf[0:100], encoding, errors)
1573         obj.mode = nti(buf[100:108])
1574         obj.uid = nti(buf[108:116])
1575         obj.gid = nti(buf[116:124])
1576         obj.size = nti(buf[124:136])
1577         obj.mtime = nti(buf[136:148])
1578         obj.chksum = chksum
1579         obj.type = buf[156:157]
1580         obj.linkname = nts(buf[157:257], encoding, errors)
1581         obj.uname = nts(buf[265:297], encoding, errors)
1582         obj.gname = nts(buf[297:329], encoding, errors)
1583         obj.devmajor = nti(buf[329:337])
1584         obj.devminor = nti(buf[337:345])
1585         prefix = nts(buf[345:500], encoding, errors)
1586
1587         # The old GNU sparse format occupies some of the unused
1588         # space in the buffer for up to 4 sparse structures.
1589         # Save the them for later processing in _proc_sparse().
1590         if obj.type == GNUTYPE_SPARSE:
1591             pos = 386
1592             structs = []
1593             for i in range(4):
1594                 try:
1595                     offset = nti(buf[pos:pos + 12])
1596                     numbytes = nti(buf[pos + 12:pos + 24])
1597                 except ValueError:
1598                     break
1599                 structs.append((offset, numbytes))
1600                 pos += 24
1601             isextended = bool(buf[482])
1602             origsize = nti(buf[483:495])
1603             obj._sparse_structs = (structs, isextended, origsize)
1604
1605         # Old V7 tar format represents a directory as a regular
1606         # file with a trailing slash.
1607         if obj.type == AREGTYPE and obj.name.endswith("/"):
1608             obj.type = DIRTYPE
1609
1610         # Remove redundant slashes from directories.
1611         if obj.isdir():
1612             obj.name = obj.name.rstrip("/")
1613
1614         # Reconstruct a ustar longname.
1615         if prefix and obj.type not in GNU_TYPES:
1616             obj.name = prefix + "/" + obj.name
1617         else:
1618             obj.offset_data = nti(buf[369:381])
1619         return obj
1620
1621     @classmethod
1622     def fromtarfile(cls, tarfile):
1623         """Return the next TarInfo object from TarFile object
1624            tarfile.
1625         """
1626         buf = tarfile.fileobj.read(BLOCKSIZE)
1627         obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1628         obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1629         return obj._proc_member(tarfile)
1630
1631     #--------------------------------------------------------------------------
1632     # The following are methods that are called depending on the type of a
1633     # member. The entry point is _proc_member() which can be overridden in a
1634     # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1635     # implement the following
1636     # operations:
1637     # 1. Set self.offset_data to the position where the data blocks begin,
1638     #    if there is data that follows.
1639     # 2. Set tarfile.offset to the position where the next member's header will
1640     #    begin.
1641     # 3. Return self or another valid TarInfo object.
1642     def _proc_member(self, tarfile):
1643         """Choose the right processing method depending on
1644            the type and call it.
1645         """
1646         if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1647             return self._proc_gnulong(tarfile)
1648         elif self.type == GNUTYPE_SPARSE:
1649             return self._proc_sparse(tarfile)
1650         elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1651             return self._proc_pax(tarfile)
1652         else:
1653             return self._proc_builtin(tarfile)
1654
1655     def _proc_builtin(self, tarfile):
1656         """Process a builtin type or an unknown type which
1657            will be treated as a regular file.
1658         """
1659         self.offset_data = tarfile.fileobj.tell()
1660         offset = self.offset_data
1661         if self.isreg() or self.ismultivol() or self.type not in SUPPORTED_TYPES:
1662             # Skip the following data blocks.
1663             offset += self._block(self.size)
1664         tarfile.offset = offset
1665
1666         # Patch the TarInfo object with saved global
1667         # header information.
1668         self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1669
1670         return self
1671
1672     def _proc_gnulong(self, tarfile):
1673         """Process the blocks that hold a GNU longname
1674            or longlink member.
1675         """
1676         buf = tarfile.fileobj.read(self._block(self.size))
1677
1678         # Fetch the next header and process it.
1679         try:
1680             next = self.fromtarfile(tarfile)
1681         except HeaderError:
1682             raise SubsequentHeaderError("missing or bad subsequent header")
1683
1684         # Patch the TarInfo object from the next header with
1685         # the longname information.
1686         next.offset = self.offset
1687         if self.type == GNUTYPE_LONGNAME:
1688             next.name = nts(buf, tarfile.encoding, tarfile.errors)
1689         elif self.type == GNUTYPE_LONGLINK:
1690             next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1691
1692         return next
1693
1694     def _proc_sparse(self, tarfile):
1695         """Process a GNU sparse header plus extra headers.
1696         """
1697         # We already collected some sparse structures in frombuf().
1698         structs, isextended, origsize = self._sparse_structs
1699         del self._sparse_structs
1700
1701         # Collect sparse structures from extended header blocks.
1702         while isextended:
1703             buf = tarfile.fileobj.read(BLOCKSIZE)
1704             pos = 0
1705             for i in range(21):
1706                 try:
1707                     offset = nti(buf[pos:pos + 12])
1708                     numbytes = nti(buf[pos + 12:pos + 24])
1709                 except ValueError:
1710                     break
1711                 if offset and numbytes:
1712                     structs.append((offset, numbytes))
1713                 pos += 24
1714             isextended = bool(buf[504])
1715         self.sparse = structs
1716
1717         self.offset_data = tarfile.fileobj.tell()
1718         tarfile.offset = self.offset_data + self._block(self.size)
1719         self.size = origsize
1720         return self
1721
1722     def _proc_pax(self, tarfile):
1723         """Process an extended or global header as described in
1724            POSIX.1-2008.
1725         """
1726         # Read the header information.
1727         buf = tarfile.fileobj.read(self._block(self.size))
1728
1729         # A pax header stores supplemental information for either
1730         # the following file (extended) or all following files
1731         # (global).
1732         if self.type == XGLTYPE:
1733             pax_headers = tarfile.pax_headers
1734         else:
1735             pax_headers = tarfile.pax_headers.copy()
1736
1737         # Check if the pax header contains a hdrcharset field. This tells us
1738         # the encoding of the path, linkpath, uname and gname fields. Normally,
1739         # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1740         # implementations are allowed to store them as raw binary strings if
1741         # the translation to UTF-8 fails.
1742         match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1743         if match is not None:
1744             pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
1745
1746         # For the time being, we don't care about anything other than "BINARY".
1747         # The only other value that is currently allowed by the standard is
1748         # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1749         hdrcharset = pax_headers.get("hdrcharset")
1750         if hdrcharset == "BINARY":
1751             encoding = tarfile.encoding
1752         else:
1753             encoding = "utf-8"
1754
1755         # Parse pax header information. A record looks like that:
1756         # "%d %s=%s\n" % (length, keyword, value). length is the size
1757         # of the complete record including the length field itself and
1758         # the newline. keyword and value are both UTF-8 encoded strings.
1759         regex = re.compile(br"(\d+) ([^=]+)=")
1760         pos = 0
1761         while True:
1762             match = regex.match(buf, pos)
1763             if not match:
1764                 break
1765
1766             length, keyword = match.groups()
1767             length = int(length)
1768             value = buf[match.end(2) + 1:match.start(1) + length - 1]
1769
1770             # Normally, we could just use "utf-8" as the encoding and "strict"
1771             # as the error handler, but we better not take the risk. For
1772             # example, GNU tar <= 1.23 is known to store filenames it cannot
1773             # translate to UTF-8 as raw strings (unfortunately without a
1774             # hdrcharset=BINARY header).
1775             # We first try the strict standard encoding, and if that fails we
1776             # fall back on the user's encoding and error handler.
1777             keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
1778                     tarfile.errors)
1779             if keyword in PAX_NAME_FIELDS:
1780                 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1781                         tarfile.errors)
1782             else:
1783                 value = self._decode_pax_field(value, "utf-8", "utf-8",
1784                         tarfile.errors)
1785
1786             pax_headers[keyword] = value
1787             pos += length
1788
1789
1790         # Fetch the next header.
1791         try:
1792             next = self.fromtarfile(tarfile)
1793         except HeaderError:
1794             raise SubsequentHeaderError("missing or bad subsequent header")
1795
1796         # Process GNU sparse information.
1797         if "GNU.sparse.map" in pax_headers:
1798             # GNU extended sparse format version 0.1.
1799             self._proc_gnusparse_01(next, pax_headers)
1800
1801         elif "GNU.sparse.size" in pax_headers:
1802             # GNU extended sparse format version 0.0.
1803             self._proc_gnusparse_00(next, pax_headers, buf)
1804
1805         elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1806             # GNU extended sparse format version 1.0.
1807             self._proc_gnusparse_10(next, pax_headers, tarfile)
1808
1809         if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1810             # Patch the TarInfo object with the extended header info.
1811             next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1812             next.offset = self.offset
1813
1814             if "size" in pax_headers:
1815                 # If the extended header replaces the size field,
1816                 # we need to recalculate the offset where the next
1817                 # header starts.
1818                 offset = next.offset_data
1819                 if next.isreg() or next.type not in SUPPORTED_TYPES:
1820                     offset += next._block(next.size)
1821                 tarfile.offset = offset
1822
1823         if next is not None:
1824             if "GNU.volume.filename" in pax_headers:
1825                 if pax_headers["GNU.volume.filename"] == next.name:
1826                     if "GNU.volume.size" in pax_headers:
1827                         next.size = int(pax_headers["GNU.volume.size"])
1828                     if "GNU.volume.offset" in pax_headers:
1829                         next.volume_offset = int(pax_headers["GNU.volume.offset"])
1830
1831                 for key in pax_headers.keys():
1832                     if key.startswith("GNU.volume"):
1833                         del tarfile.pax_headers[key]
1834
1835         return next
1836
1837     def _proc_gnusparse_00(self, next, pax_headers, buf):
1838         """Process a GNU tar extended sparse header, version 0.0.
1839         """
1840         offsets = []
1841         for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1842             offsets.append(int(match.group(1)))
1843         numbytes = []
1844         for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1845             numbytes.append(int(match.group(1)))
1846         next.sparse = list(zip(offsets, numbytes))
1847
1848     def _proc_gnusparse_01(self, next, pax_headers):
1849         """Process a GNU tar extended sparse header, version 0.1.
1850         """
1851         sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1852         next.sparse = list(zip(sparse[::2], sparse[1::2]))
1853
1854     def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1855         """Process a GNU tar extended sparse header, version 1.0.
1856         """
1857         fields = None
1858         sparse = []
1859         buf = tarfile.fileobj.read(BLOCKSIZE)
1860         fields, buf = buf.split(b"\n", 1)
1861         fields = int(fields)
1862         while len(sparse) < fields * 2:
1863             if b"\n" not in buf:
1864                 buf += tarfile.fileobj.read(BLOCKSIZE)
1865             number, buf = buf.split(b"\n", 1)
1866             sparse.append(int(number))
1867         next.offset_data = tarfile.fileobj.tell()
1868         next.sparse = list(zip(sparse[::2], sparse[1::2]))
1869
1870     def _apply_pax_info(self, pax_headers, encoding, errors):
1871         """Replace fields with supplemental information from a previous
1872            pax extended or global header.
1873         """
1874         for keyword, value in pax_headers.items():
1875             if keyword == "GNU.sparse.name":
1876                 setattr(self, "path", value)
1877             elif keyword == "GNU.sparse.size":
1878                 setattr(self, "size", int(value))
1879             elif keyword == "GNU.sparse.realsize":
1880                 setattr(self, "size", int(value))
1881             elif keyword in PAX_FIELDS:
1882                 if keyword in PAX_NUMBER_FIELDS:
1883                     try:
1884                         value = PAX_NUMBER_FIELDS[keyword](value)
1885                     except ValueError:
1886                         value = 0
1887                 if keyword == "path":
1888                     value = value.rstrip("/")       # pylint: disable=no-member
1889                 setattr(self, keyword, value)
1890
1891         self.pax_headers = pax_headers.copy()
1892
1893     def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1894         """Decode a single field from a pax record.
1895         """
1896         try:
1897             return value.decode(encoding, "strict")
1898         except UnicodeDecodeError:
1899             return value.decode(fallback_encoding, fallback_errors)
1900
1901     def _block(self, count):
1902         """Round up a byte count by BLOCKSIZE and return it,
1903            e.g. _block(834) => 1024.
1904         """
1905         blocks, remainder = divmod(count, BLOCKSIZE)
1906         if remainder:
1907             blocks += 1
1908         return blocks * BLOCKSIZE
1909
1910     def isreg(self):
1911         return self.type in REGULAR_TYPES
1912     def isfile(self):
1913         return self.isreg()
1914     def isdir(self):
1915         return self.type == DIRTYPE
1916     def issym(self):
1917         return self.type == SYMTYPE
1918     def islnk(self):
1919         return self.type == LNKTYPE
1920     def ischr(self):
1921         return self.type == CHRTYPE
1922     def isblk(self):
1923         return self.type == BLKTYPE
1924     def isfifo(self):
1925         return self.type == FIFOTYPE
1926     def issparse(self):
1927         return self.sparse is not None
1928     def isdev(self):
1929         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1930     def ismultivol(self):
1931         return self.type == GNUTYPE_MULTIVOL or self.volume_offset > 0 or\
1932             "GNU.volume.offset" in self.pax_headers
1933 # class TarInfo
1934
1935 class TarFile(object):
1936     """The TarFile Class provides an interface to tar archives.
1937     """
1938
1939     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1940
1941     dereference = False         # If true, add content of linked file to the
1942                                 # tar file, else the link.
1943
1944     ignore_zeros = False        # If true, skips empty or invalid blocks and
1945                                 # continues processing.
1946
1947     max_volume_size = None      # If different from None, establishes maximum
1948                                 # size of tar volumes
1949
1950     new_volume_handler = None   # function handler to be executed before when
1951                                 # a new volume is needed
1952
1953     volume_number = 0           # current volume number, used for multi volume
1954                                 # support
1955
1956     errorlevel = 1              # If 0, fatal errors only appear in debug
1957                                 # messages (if debug >= 0). If > 0, errors
1958                                 # are passed to the caller as exceptions.
1959
1960     format = DEFAULT_FORMAT     # The format to use when creating an archive.
1961
1962     encoding = ENCODING         # Encoding for 8-bit character strings.
1963
1964     errors = None               # Error handler for unicode conversion.
1965
1966     tarinfo = TarInfo           # The default TarInfo class to use.
1967
1968     fileobject = ExFileObject   # The file-object for extractfile().
1969
1970     arcmode = ARCMODE_PLAIN     # Object processing mode (“concat”, encryption,
1971                                 # compression)
1972
1973     save_to_members = True      # If new members are saved. This can be disabled
1974                                 # if you manage lots of files and don't want
1975                                 # to have high memory usage
1976
1977     cache_uid2user = {}         # cache to avoid getpwuid calls. It always parses /etc/passwd.
1978     cache_gid2group = {}        # same cache for groups
1979
1980     def __init__(self, name=None, mode="r", fileobj=None, format=None,
1981             tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1982             errors="surrogateescape", pax_headers=None, debug=None,
1983             errorlevel=None, max_volume_size=None, new_volume_handler=None,
1984             concat=False, nacl=None,
1985             save_to_members=True):
1986         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1987            read from an existing archive, 'a' to append data to an existing
1988            file or 'w' to create a new file overwriting an existing one. `mode'
1989            defaults to 'r'.
1990            If `fileobj' is given, it is used for reading or writing data. If it
1991            can be determined, `mode' is overridden by `fileobj's mode.
1992            `fileobj' is not closed, when TarFile is closed.
1993         """
1994         if len(mode) > 1 or mode not in "raw":
1995             raise ValueError("mode must be 'r', 'a' or 'w'")
1996         self.mode = mode
1997         self.arcmode = arcmode_set (concat)
1998         self.nacl = nacl
1999         self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
2000
2001         if not fileobj:
2002             if self.mode == "a" and not os.path.exists(name):
2003                 # Create nonexistent files in append mode.
2004                 self.mode = "w"
2005                 self._mode = "wb"
2006             fileobj = bltn_open(name, self._mode)
2007             self._extfileobj = False
2008         else:
2009             if name is None and hasattr(fileobj, "name"):
2010                 name = fileobj.name
2011             # when fileobj is a gzip.GzipFile, fileobj.mode is an int (not valid for us)
2012             if hasattr(fileobj, "mode") and isinstance(fileobj.mode, str):
2013                 self._mode = fileobj.mode
2014             self._extfileobj = True
2015         self.name = os.path.abspath(name) if name else None
2016         self.base_name = self.name = os.path.abspath(name) if name else None
2017         self.fileobj = fileobj
2018
2019         # Init attributes.
2020         if format is not None:
2021             self.format = format
2022         if tarinfo is not None:
2023             self.tarinfo = tarinfo
2024         if dereference is not None:
2025             self.dereference = dereference
2026         if ignore_zeros is not None:
2027             self.ignore_zeros = ignore_zeros
2028         if encoding is not None:
2029             self.encoding = encoding
2030
2031         self.errors = errors
2032
2033         if pax_headers is not None and self.format == PAX_FORMAT:
2034             self.pax_headers = pax_headers
2035         else:
2036             self.pax_headers = {}
2037
2038         if debug is not None:
2039             self.debug = debug
2040         if errorlevel is not None:
2041             self.errorlevel = errorlevel
2042
2043         # Init datastructures.
2044         if max_volume_size and max_volume_size < 3*BLOCKSIZE:
2045             raise ValueError("max_volume_size needs to be at least %d" % (3*BLOCKSIZE))
2046         if max_volume_size and not callable(new_volume_handler):
2047             raise ValueError("new_volume_handler needs to be set and be callable for multivolume support")
2048         if max_volume_size:
2049             self.max_volume_size = int(max_volume_size)
2050         else:
2051             self.max_volume_size = None
2052
2053         self.save_to_members = save_to_members
2054         self.new_volume_handler = new_volume_handler
2055         self.closed = False
2056         self.members = []       # list of members as TarInfo objects
2057         self._loaded = False    # flag if all members have been read
2058         self.offset = self.fileobj.tell()
2059                                 # current position in the archive file
2060         self.inodes = {}        # dictionary caching the inodes of
2061                                 # archive members already added
2062
2063         try:
2064             if self.mode == "r":
2065                 self.firstmember = None
2066                 self.firstmember = self.next()
2067
2068             if self.mode == "a":
2069                 # Move to the end of the archive,
2070                 # before the first empty block.
2071                 while True:
2072                     self.fileobj.seek(self.offset)
2073                     try:
2074                         tarinfo = self.tarinfo.fromtarfile(self)
2075                         self.members.append(tarinfo)
2076                     except EOFHeaderError:
2077                         self.fileobj.seek(self.offset)
2078                         break
2079                     except HeaderError as e:
2080                         raise ReadError(str(e))
2081
2082             if self.mode in "aw":
2083                 self._loaded = True
2084
2085                 if self.pax_headers:
2086                     buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
2087                     self.fileobj.write(buf)
2088                     self.offset += len(buf)
2089         except:
2090             if not self._extfileobj:
2091                 self.fileobj.close()
2092             self.closed = True
2093             raise
2094
2095     #--------------------------------------------------------------------------
2096     # Below are the classmethods which act as alternate constructors to the
2097     # TarFile class. The open() method is the only one that is needed for
2098     # public use; it is the "super"-constructor and is able to select an
2099     # adequate "sub"-constructor for a particular compression using the mapping
2100     # from OPEN_METH.
2101     #
2102     # This concept allows one to subclass TarFile without losing the comfort of
2103     # the super-constructor. A sub-constructor is registered and made available
2104     # by adding it to the mapping in OPEN_METH.
2105
2106     @classmethod
2107     def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE,
2108              encryption=None, compresslevel=9, tolerance=TOLERANCE_STRICT,
2109              **kwargs):
2110         """Open a tar archive for reading, writing or appending. Return
2111            an appropriate TarFile class.
2112
2113            mode:
2114            'r' or 'r:*' open for reading with transparent compression
2115            'r:'         open for reading exclusively uncompressed
2116            'r:gz'       open for reading with gzip compression
2117            'r:bz2'      open for reading with bzip2 compression
2118            'r:xz'       open for reading with lzma compression
2119            'a' or 'a:'  open for appending, creating the file if necessary
2120            'w' or 'w:'  open for writing without compression
2121            'w:gz'       open for writing with gzip compression
2122            'w:bz2'      open for writing with bzip2 compression
2123            'w:xz'       open for writing with lzma compression
2124
2125            'r|*'        open a stream of tar blocks with transparent compression
2126            'r|'         open an uncompressed stream of tar blocks for reading
2127            'r|gz'       open a gzip compressed stream of tar blocks
2128            'r|bz2'      open a bzip2 compressed stream of tar blocks
2129            'r|xz'       open an lzma compressed stream of tar blocks
2130            'w|'         open an uncompressed stream for writing
2131            'w|gz'       open a gzip compressed stream for writing
2132            'w|bz2'      open a bzip2 compressed stream for writing
2133            'w|xz'       open an lzma compressed stream for writing
2134
2135            'r#gz'       open a stream of gzip compressed tar blocks for reading
2136            'w#gz'       open a stream of gzip compressed tar blocks for writing
2137         """
2138         if not name and not fileobj:
2139             raise ValueError("nothing to open")
2140
2141         if mode in ("r", "r:*"):
2142             # Find out which *open() is appropriate for opening the file.
2143             for comptype in cls.OPEN_METH:
2144                 func = getattr(cls, cls.OPEN_METH[comptype])
2145                 if fileobj is not None:
2146                     saved_pos = fileobj.tell()
2147                 try:
2148                     return func(name, "r", fileobj, **kwargs)
2149                 except (ReadError, CompressionError) as e:
2150                     # usually nothing exceptional but sometimes is
2151                     if fileobj is not None:
2152                         fileobj.seek(saved_pos)
2153                     continue
2154             raise ReadError("file could not be opened successfully")
2155
2156         elif ":" in mode:
2157             filemode, comptype = mode.split(":", 1)
2158             filemode = filemode or "r"
2159             comptype = comptype or "tar"
2160
2161             # Select the *open() function according to
2162             # given compression.
2163             if comptype in cls.OPEN_METH:
2164                 func = getattr(cls, cls.OPEN_METH[comptype])
2165             else:
2166                 raise CompressionError("unknown compression type %r" % comptype)
2167
2168             # Pass on compression level for gzip / bzip2.
2169             if comptype == 'gz' or comptype == 'bz2':
2170                 kwargs['compresslevel'] = compresslevel
2171
2172             if 'max_volume_size' in kwargs:
2173                 if comptype != 'tar' and filemode in 'wa' \
2174                         and kwargs['max_volume_size']:
2175                     import warnings
2176                     warnings.warn('Only the first volume will be compressed '
2177                                   'for modes with "w:"!')
2178
2179             return func(name, filemode, fileobj, **kwargs)
2180
2181         elif "|" in mode:
2182             filemode, comptype = mode.split("|", 1)
2183             filemode = filemode or "r"
2184             comptype = comptype or "tar"
2185
2186             if filemode not in "rw":
2187                 raise ValueError("mode must be 'r' or 'w'")
2188
2189             t = cls(name, filemode,
2190                     _Stream(name, filemode, comptype, fileobj, bufsize,
2191                             compresslevel=compresslevel),
2192                     **kwargs)
2193             t._extfileobj = False
2194             return t
2195
2196         elif "#" in mode:
2197             filemode, comptype = mode.split("#", 1)
2198             filemode = filemode or "r"
2199
2200             if filemode not in "rw":
2201                 raise ValueError ("mode %s not compatible with concat "
2202                                   "archive; must be 'r' or 'w'" % mode)
2203
2204             stream = _Stream(name, filemode, comptype, fileobj, bufsize,
2205                              concat=True, encryption=encryption,
2206                              compresslevel=compresslevel, tolerance=tolerance)
2207             kwargs ["concat"] = True
2208             try:
2209                 t = cls(name, filemode, stream, **kwargs)
2210             except: # XXX except what?
2211                 stream.close()
2212                 raise # XXX raise what?
2213             t._extfileobj = False
2214             return t
2215
2216         elif mode in "aw":
2217             return cls.taropen(name, mode, fileobj, **kwargs)
2218
2219         raise ValueError("undiscernible mode %r" % mode)
2220
2221
2222     @classmethod
2223     def open_at_offset(cls, offset, *a, **kwa):
2224         """
2225         Same as ``.open()``, but start reading at the given offset. Assumes a
2226         seekable file object.
2227         """
2228         fileobj = kwa.get ("fileobj")
2229         if fileobj is not None:
2230             fileobj.seek (offset)
2231         return cls.open (*a, **kwa)
2232
2233
2234     @classmethod
2235     def taropen(cls, name, mode="r", fileobj=None, **kwargs):
2236         """Open uncompressed tar archive name for reading or writing.
2237         """
2238         if len(mode) > 1 or mode not in "raw":
2239             raise ValueError("mode must be 'r', 'a' or 'w'")
2240         return cls(name, mode, fileobj, **kwargs)
2241
2242     @classmethod
2243     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
2244         """Open gzip compressed tar archive name for reading or writing.
2245            Appending is not allowed.
2246         """
2247         if len(mode) > 1 or mode not in "rw":
2248             raise ValueError("mode must be 'r' or 'w'")
2249
2250         try:
2251             import gzip
2252             gzip.GzipFile
2253         except (ImportError, AttributeError):
2254             raise CompressionError("gzip module is not available")
2255
2256         extfileobj = fileobj is not None
2257         try:
2258             fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
2259             t = cls.taropen(name, mode, fileobj, **kwargs)
2260         except OSError:
2261             if not extfileobj and fileobj is not None:
2262                 fileobj.close()
2263             if fileobj is None:
2264                 raise
2265             raise ReadError("not a gzip file")
2266         except:
2267             if not extfileobj and fileobj is not None:
2268                 fileobj.close()
2269             raise
2270         t._extfileobj = extfileobj
2271         return t
2272
2273     @classmethod
2274     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
2275         """Open bzip2 compressed tar archive name for reading or writing.
2276            Appending is not allowed.
2277         """
2278         if len(mode) > 1 or mode not in "rw":
2279             raise ValueError("mode must be 'r' or 'w'.")
2280
2281         try:
2282             import bz2
2283         except ImportError:
2284             raise CompressionError("bz2 module is not available")
2285
2286         fileobj = bz2.BZ2File(fileobj or name, mode,
2287                               compresslevel=compresslevel)
2288
2289         try:
2290             t = cls.taropen(name, mode, fileobj, **kwargs)
2291         except (OSError, EOFError):
2292             fileobj.close()
2293             raise ReadError("not a bzip2 file")
2294         t._extfileobj = False
2295         return t
2296
2297     @classmethod
2298     def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
2299         """Open lzma compressed tar archive name for reading or writing.
2300            Appending is not allowed.
2301         """
2302         if mode not in ("r", "w"):
2303             raise ValueError("mode must be 'r' or 'w'")
2304
2305         try:
2306             import lzma
2307         except ImportError:
2308             raise CompressionError("lzma module is not available")
2309
2310         fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
2311
2312         try:
2313             t = cls.taropen(name, mode, fileobj, **kwargs)
2314         except (lzma.LZMAError, EOFError):
2315             fileobj.close()
2316             raise ReadError("not an lzma file")
2317         t._extfileobj = False
2318         return t
2319
2320     # All *open() methods are registered here.
2321     OPEN_METH = {
2322         "tar": "taropen",   # uncompressed tar
2323         "gz":  "gzopen",    # gzip compressed tar
2324         "bz2": "bz2open",   # bzip2 compressed tar
2325         "xz":  "xzopen"     # lzma compressed tar
2326     }
2327
2328     #--------------------------------------------------------------------------
2329     # The public methods which TarFile provides:
2330
2331     def close(self):
2332         """Close the TarFile. In write-mode, two finishing zero blocks are
2333            appended to the archive. A special case are empty archives which are
2334            initialized accordingly so the two mandatory blocks of zeros are
2335            written abiding by the requested encryption and compression settings.
2336         """
2337         if self.closed:
2338             return
2339
2340         if self.mode in "aw":
2341             if self.arcmode & ARCMODE_CONCAT and self.fileobj.tell () == 0:
2342                 self.fileobj.next ("")
2343             self.fileobj.write(NUL * (BLOCKSIZE * 2))
2344             self.offset += (BLOCKSIZE * 2)
2345             # fill up the end with zero-blocks
2346             # (like option -b20 for tar does)
2347             blocks, remainder = divmod(self.offset, RECORDSIZE)
2348             if remainder > 0:
2349                 self.fileobj.write(NUL * (RECORDSIZE - remainder))
2350         if not self._extfileobj:
2351             self.fileobj.close()
2352         self.closed = True
2353
2354     def getmember(self, name):
2355         """Return a TarInfo object for member `name'. If `name' can not be
2356            found in the archive, KeyError is raised. If a member occurs more
2357            than once in the archive, its last occurrence is assumed to be the
2358            most up-to-date version.
2359         """
2360         tarinfo = self._getmember(name)
2361         if tarinfo is None:
2362             raise KeyError("filename %r not found" % name)
2363         return tarinfo
2364
2365     def getmembers(self):
2366         """Return the members of the archive as a list of TarInfo objects. The
2367            list has the same order as the members in the archive.
2368         """
2369         self._check()
2370         if not self._loaded:    # if we want to obtain a list of
2371             self._load()        # all members, we first have to
2372                                 # scan the whole archive.
2373         return self.members
2374
2375     def get_last_member_offset(self):
2376         """Return the last member offset. Usually this is self.fileobj.tell(),
2377            but when there's encryption or concat compression going on it's more
2378            complicated than that.
2379         """
2380         return self.last_block_offset
2381
2382     def getnames(self):
2383         """Return the members of the archive as a list of their names. It has
2384            the same order as the list returned by getmembers().
2385         """
2386         return [tarinfo.name for tarinfo in self.getmembers()]
2387
2388     def gettarinfo(self, name=None, arcname=None, fileobj=None):
2389         """Create a TarInfo object for either the file `name' or the file
2390            object `fileobj' (using os.fstat on its file descriptor). You can
2391            modify some of the TarInfo's attributes before you add it using
2392            addfile(). If given, `arcname' specifies an alternative name for the
2393            file in the archive.
2394         """
2395         self._check("aw")
2396
2397         # When fileobj is given, replace name by
2398         # fileobj's real name.
2399         if fileobj is not None:
2400             name = fileobj.name
2401
2402         # Building the name of the member in the archive.
2403         # Backward slashes are converted to forward slashes,
2404         # Absolute paths are turned to relative paths.
2405         if arcname is None:
2406             arcname = name
2407         drv, arcname = os.path.splitdrive(arcname)
2408         arcname = arcname.replace(os.sep, "/")
2409         arcname = arcname.lstrip("/")
2410
2411         # Now, fill the TarInfo object with
2412         # information specific for the file.
2413         tarinfo = self.tarinfo()
2414         tarinfo.tarfile = self
2415
2416         # Use os.stat or os.lstat, depending on platform
2417         # and if symlinks shall be resolved.
2418         if fileobj is None:
2419             if hasattr(os, "lstat") and not self.dereference:
2420                 statres = os.lstat(name)
2421             else:
2422                 statres = os.stat(name)
2423         else:
2424             statres = os.fstat(fileobj.fileno())
2425         linkname = ""
2426
2427         stmd = statres.st_mode
2428         if stat.S_ISREG(stmd):
2429             inode = (statres.st_ino, statres.st_dev)
2430             if not self.dereference and statres.st_nlink > 1 and \
2431                     inode in self.inodes and arcname != self.inodes[inode]:
2432                 # Is it a hardlink to an already
2433                 # archived file?
2434                 type = LNKTYPE
2435                 linkname = self.inodes[inode]
2436             else:
2437                 # The inode is added only if its valid.
2438                 # For win32 it is always 0.
2439                 type = REGTYPE
2440                 if inode[0] and self.save_to_members:
2441                     self.inodes[inode] = arcname
2442         elif stat.S_ISDIR(stmd):
2443             type = DIRTYPE
2444         elif stat.S_ISFIFO(stmd):
2445             type = FIFOTYPE
2446         elif stat.S_ISLNK(stmd):
2447             type = SYMTYPE
2448             linkname = os.readlink(name)
2449         elif stat.S_ISCHR(stmd):
2450             type = CHRTYPE
2451         elif stat.S_ISBLK(stmd):
2452             type = BLKTYPE
2453         else:
2454             return None
2455
2456         # Fill the TarInfo object with all
2457         # information we can get.
2458         tarinfo.name = arcname
2459         tarinfo.mode = stmd
2460         tarinfo.uid = statres.st_uid
2461         tarinfo.gid = statres.st_gid
2462         if type == REGTYPE:
2463             tarinfo.size = statres.st_size
2464         else:
2465             tarinfo.size = 0
2466         tarinfo.mtime = statres.st_mtime
2467         tarinfo.type = type
2468         tarinfo.linkname = linkname
2469         if pwd:
2470             if tarinfo.uid in self.cache_uid2user:
2471                 tarinfo.uname = self.cache_uid2user[tarinfo.uid]
2472             else:
2473                 try:
2474                     tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
2475                     self.cache_uid2user[tarinfo.uid] = tarinfo.uname
2476                 except KeyError:
2477                     # remember user does not exist:
2478                     # same default value as in tarinfo class
2479                     self.cache_uid2user[tarinfo.uid] = ""
2480         if grp:
2481             if tarinfo.gid in self.cache_gid2group:
2482                 tarinfo.gname = self.cache_gid2group[tarinfo.gid]
2483             else:
2484                 try:
2485                     tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
2486                     self.cache_gid2group[tarinfo.gid] = tarinfo.gname
2487                 except KeyError:
2488                     # remember group does not exist:
2489                     # same default value as in tarinfo class
2490                     self.cache_gid2group[tarinfo.gid] = ""
2491
2492         if type in (CHRTYPE, BLKTYPE):
2493             if hasattr(os, "major") and hasattr(os, "minor"):
2494                 tarinfo.devmajor = os.major(statres.st_rdev)
2495                 tarinfo.devminor = os.minor(statres.st_rdev)
2496         return tarinfo
2497
2498     def list(self, verbose=True):
2499         """Print a table of contents to sys.stdout. If `verbose' is False, only
2500            the names of the members are printed. If it is True, an `ls -l'-like
2501            output is produced.
2502         """
2503         self._check()
2504
2505         for tarinfo in self:
2506             if verbose:
2507                 print(stat.filemode(tarinfo.mode), end=' ')
2508                 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
2509                                  tarinfo.gname or tarinfo.gid), end=' ')
2510                 if tarinfo.ischr() or tarinfo.isblk():
2511                     print("%10s" % ("%d,%d" \
2512                                     % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
2513                 else:
2514                     print("%10d" % tarinfo.size, end=' ')
2515                 print("%d-%02d-%02d %02d:%02d:%02d" \
2516                       % time.localtime(tarinfo.mtime)[:6], end=' ')
2517
2518             print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
2519
2520             if verbose:
2521                 if tarinfo.issym():
2522                     print("->", tarinfo.linkname, end=' ')
2523                 if tarinfo.islnk():
2524                     print("link to", tarinfo.linkname, end=' ')
2525             print()
2526
2527     def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
2528         """Add the file `name' to the archive. `name' may be any type of file
2529            (directory, fifo, symbolic link, etc.). If given, `arcname'
2530            specifies an alternative name for the file in the archive.
2531            Directories are added recursively by default. This can be avoided by
2532            setting `recursive' to False. `exclude' is a function that should
2533            return True for each filename to be excluded. `filter' is a function
2534            that expects a TarInfo object argument and returns the changed
2535            TarInfo object, if it returns None the TarInfo object will be
2536            excluded from the archive.
2537         """
2538         self._check("aw")
2539
2540         if arcname is None:
2541             arcname = name
2542
2543         # Exclude pathnames.
2544         if exclude is not None:
2545             import warnings
2546             warnings.warn("use the filter argument instead",
2547                     DeprecationWarning, 2)
2548             if exclude(name):
2549                 self._dbg(2, "tarfile: Excluded %r" % name)
2550                 return
2551
2552         # Skip if somebody tries to archive the archive...
2553         if self.name is not None and os.path.abspath(name) == self.name:
2554             self._dbg(2, "tarfile: Skipped %r" % name)
2555             return
2556
2557         self._dbg(1, name)
2558
2559         # Create a TarInfo object from the file.
2560         tarinfo = self.gettarinfo(name, arcname)
2561
2562         if tarinfo is None:
2563             self._dbg(1, "tarfile: Unsupported type %r" % name)
2564             return
2565
2566         # Change or exclude the TarInfo object.
2567         if filter is not None:
2568             tarinfo = filter(tarinfo)
2569             if tarinfo is None:
2570                 self._dbg(2, "tarfile: Excluded %r" % name)
2571                 return
2572
2573         # Append the tar header and data to the archive.
2574         if tarinfo.isreg():
2575             with bltn_open(name, "rb") as f:
2576                 self.addfile(tarinfo, f)
2577
2578         elif tarinfo.isdir():
2579             self.addfile(tarinfo)
2580             if recursive:
2581                 for f in os.listdir(name):
2582                     self.add(os.path.join(name, f), os.path.join(arcname, f),
2583                             recursive, exclude, filter=filter)
2584
2585         else:
2586             self.addfile(tarinfo)
2587
2588     def _size_left_file(self):
2589         """Calculates size left in a volume with a maximum volume size.
2590
2591         Assumes self.max_volume_size is set.
2592         If using compression through a _Stream, use _size_left_stream instead
2593         """
2594         # left-over size = max_size - offset - 2 zero-blocks written in close
2595         size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset
2596         # limit size left to a discrete number of blocks, because we won't
2597         # write only half a block when writting the end of a volume
2598         # and filling with zeros
2599         return BLOCKSIZE * (size_left // BLOCKSIZE)
2600
2601     def _size_left_stream(self):
2602         """ Calculates size left in a volume if using comression/encryption
2603
2604         Assumes self.max_volume_size is set and self.fileobj is a _Stream
2605         (otherwise use _size_left_file)
2606         """
2607         # left-over size = max_size - bytes written - 2 zero-blocks (close)
2608         size_left = self.max_volume_size - self.fileobj.estim_file_size() \
2609             - 2*BLOCKSIZE
2610         return BLOCKSIZE * (size_left // BLOCKSIZE)
2611
2612     def addfile(self, tarinfo, fileobj=None):
2613         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2614            given, tarinfo.size bytes are read from it and added to the archive.
2615            You can create TarInfo objects using gettarinfo().
2616            On Windows platforms, `fileobj' should always be opened with mode
2617            'rb' to avoid irritation about the file size.
2618         """
2619         self._check("aw")
2620
2621         tarinfo = copy.copy(tarinfo)
2622
2623         if self.arcmode & ARCMODE_CONCAT:
2624             self.last_block_offset = self.fileobj.next (tarinfo.name)
2625         else:
2626             self.last_block_offset = self.fileobj.tell()
2627
2628         buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2629         self.fileobj.write(buf)
2630         self.offset += len(buf)
2631
2632         if self.max_volume_size:
2633             if isinstance(self.fileobj, _Stream):
2634                 _size_left = self._size_left_stream
2635             else:
2636                 _size_left = self._size_left_file
2637         else:
2638             _size_left = lambda: tarinfo.size
2639
2640         # If there's no data to follow, finish
2641         if not fileobj:
2642             if self.save_to_members:
2643                 self.members.append(tarinfo)
2644             return
2645
2646         target_size_left = _size_left()
2647         source_size_left = tarinfo.size
2648         assert tarinfo.volume_offset == 0
2649
2650         # we only split volumes in the middle of a file, that means we have
2651         # to write at least one block
2652         if target_size_left < BLOCKSIZE:
2653             target_size_left = BLOCKSIZE
2654
2655         # loop over multiple volumes
2656         while source_size_left > 0:
2657
2658             # Write as much data as possble from source into target.
2659             # When compressing data, we cannot easily predict how much data we
2660             # can write until target_size_left == 0 --> need to iterate
2661             size_can_write = min(target_size_left, source_size_left)
2662
2663             while size_can_write > 0:
2664                 copyfileobj(fileobj, self.fileobj, size_can_write)
2665                 self.offset += size_can_write
2666                 source_size_left -= size_can_write
2667                 target_size_left = _size_left()
2668                 size_can_write = min(target_size_left, source_size_left)
2669
2670             # now target_size_left == 0 or source_size_left == 0
2671
2672             # if there is data left to write, we need to create a new volume
2673             if source_size_left > 0:
2674                 # Only finalize the crypto entry here if we’re continuing with
2675                 # another one; otherwise, the encryption must include the block
2676                 # padding below.
2677                 tarinfo.type = GNUTYPE_MULTIVOL
2678
2679                 if not self.new_volume_handler or\
2680                     not callable(self.new_volume_handler):
2681                     raise Exception("We need to create a new volume and you "
2682                                     "didn't supply a new_volume_handler")
2683
2684
2685                 # the new volume handler should do everything needed to
2686                 # start working in a new volume. usually, the handler calls
2687                 # to self.open_volume
2688                 self.volume_number += 1
2689
2690                 # set to be used by open_volume, because in the case of a PAX
2691                 # tar it needs to write information about the volume and offset
2692                 # in the global header
2693                 tarinfo.volume_offset = tarinfo.size - source_size_left
2694                 self.volume_tarinfo = tarinfo
2695
2696                 # the “new_volume_handler” is supposed to call .close() on the
2697                 # “fileobj” _Stream
2698                 self.new_volume_handler(self, self.base_name, self.volume_number)
2699
2700                 self.volume_tarinfo = None
2701
2702                 if self.arcmode & ARCMODE_CONCAT:
2703                     self.fileobj.next_volume (tarinfo.name)
2704
2705                 # write new volume header
2706                 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2707                 self.fileobj.write(buf)
2708                 self.offset += len(buf)
2709
2710                 # adjust variables; open_volume should have reset self.offset
2711                 # --> _size_left should be big again
2712                 target_size_left = _size_left()
2713                 size_can_write = min(target_size_left, source_size_left)
2714                 self._dbg(3, 'new volume')
2715
2716         # now, all data has been written. We may have to fill up the rest of
2717         # the block in target with 0s
2718         remainder = (tarinfo.size - tarinfo.volume_offset) % BLOCKSIZE
2719         if remainder > 0:
2720             self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2721             self.offset += BLOCKSIZE - remainder
2722
2723         if self.save_to_members:
2724             self.members.append(tarinfo)
2725
2726     def open_volume(self, name="", fileobj=None, encryption=None):
2727         '''
2728         Called by the user to change this tar file to point to a new volume.
2729         '''
2730         # open the file using either fileobj or name
2731         if not fileobj:
2732             if self.mode == "a" and not os.path.exists(name):
2733                 # Create nonexistent files in append mode.
2734                 self.mode = "w"
2735                 self._mode = "wb"
2736             self._extfileobj = False
2737
2738             if isinstance(self.fileobj, _Stream):
2739                 self._dbg(3, 'open_volume: create a _Stream')
2740                 fileobj = _Stream(name=name,
2741                             mode=self.fileobj.mode,
2742                             comptype=self.fileobj.comptype,
2743                             fileobj=None,
2744                             bufsize=self.fileobj.bufsize,
2745                             encryption=encryption or self.fileobj.encryption,
2746                             concat=self.fileobj.arcmode & ARCMODE_CONCAT)
2747             else:
2748                 # here, we lose information about compression/encryption!
2749                 self._dbg(3, 'open_volume: builtin open')
2750                 fileobj = bltn_open(name, self._mode)
2751         else:
2752             if name is None and hasattr(fileobj, "name"):
2753                 name = fileobj.name
2754             if hasattr(fileobj, "mode"):
2755                 self._mode = fileobj.mode
2756             self._extfileobj = True
2757             self._dbg(3, 'open_volume: using external fileobj {}', fileobj)
2758         self.name = os.path.abspath(name) if name else None
2759         self.fileobj = fileobj
2760
2761         # init data structures
2762         self.closed = False
2763         self.members = []       # list of members as TarInfo objects
2764         self._loaded = False    # flag if all members have been read
2765         self.offset = self.fileobj.tell()
2766                                 # current position in the archive file
2767         self.inodes = {}        # dictionary caching the inodes of
2768                                 # archive members already added
2769
2770         try:
2771             if self.mode == "r":
2772                 self.firstmember = None
2773                 self.firstmember = self.next()
2774
2775             if self.mode == "a":
2776                 # Move to the end of the archive,
2777                 # before the first empty block.
2778                 while True:
2779                     self.fileobj.seek(self.offset)
2780                     try:
2781                         tarinfo = self.tarinfo.fromtarfile(self)
2782                         self.members.append(tarinfo)
2783                     except EOFHeaderError:
2784                         self.fileobj.seek(self.offset)
2785                         break
2786                     except HeaderError as e:
2787                         raise ReadError(str(e))
2788
2789             if self.mode in "aw":
2790                 self._loaded = True
2791
2792                 if  self.format == PAX_FORMAT:
2793                     volume_info = {
2794                         "GNU.volume.filename": str(self.volume_tarinfo.name),
2795                         "GNU.volume.size": str(self.volume_tarinfo.size - self.volume_tarinfo.volume_offset),
2796                         "GNU.volume.offset": str(self.volume_tarinfo.volume_offset),
2797                     }
2798
2799                     self.pax_headers.update(volume_info)
2800
2801                     if isinstance(self.fileobj, _Stream):
2802                         self.fileobj._init_write_gz ()
2803                     buf = self.tarinfo.create_pax_global_header(volume_info.copy())
2804                     self.fileobj.write(buf)
2805                     self.offset += len(buf)
2806         except Exception as exn:
2807             if not self._extfileobj:
2808                 self.fileobj.close()
2809             self.closed = True
2810             raise
2811
2812     def extractall(self, path=".", members=None, filter=None):
2813         """Extract all members from the archive to the current working
2814            directory and set owner, modification time and permissions on
2815            directories afterwards. `path' specifies a different directory
2816            to extract to. `members' is optional and must be a subset of the
2817            list returned by getmembers().
2818         """
2819         directories = []
2820
2821         if members is None:
2822             members = self
2823
2824         for tarinfo in members:
2825             if self.volume_number > 0 and tarinfo.ismultivol():
2826                 continue
2827
2828             if filter and not filter(tarinfo):
2829                 continue
2830
2831             if tarinfo.isdir():
2832                 # Extract directories with a safe mode.
2833                 directories.append(tarinfo)
2834                 tarinfo = copy.copy(tarinfo)
2835                 tarinfo.mode = 0o0700
2836             # Do not set_attrs directories, as we will do that further down
2837             self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
2838
2839         # Reverse sort directories.
2840         directories.sort(key=lambda a: a.name)
2841         directories.reverse()
2842
2843         # Set correct owner, mtime and filemode on directories.
2844         for tarinfo in directories:
2845             dirpath = os.path.join(path, tarinfo.name)
2846             try:
2847                 self.chown(tarinfo, dirpath)
2848                 self.utime(tarinfo, dirpath)
2849                 self.chmod(tarinfo, dirpath)
2850             except ExtractError as e:
2851                 if self.errorlevel > 1:
2852                     raise
2853                 else:
2854                     self._dbg(1, "tarfile: %s" % e)
2855
2856     def extract(self, member, path="", set_attrs=True, symlink_cb=None):
2857         """Extract a member from the archive to the current working directory,
2858            using its full name. Its file information is extracted as accurately
2859            as possible. `member' may be a filename or a TarInfo object. You can
2860            specify a different directory using `path'. File attributes (owner,
2861            mtime, mode) are set unless `set_attrs' is False.
2862            ``symlink_cb`` is a hook accepting a function that is passed the
2863            ``member``, ``path``, and ``set_attrs`` arguments if the tarinfo for
2864            ``member`` indicates a symlink in which case only the callback
2865            passed will be applied, skipping the actual extraction. In case the
2866            callback is invoked, its return value is passed on to the caller.
2867         """
2868         self._check("r")
2869
2870         if isinstance(member, str):
2871             tarinfo = self.getmember(member)
2872         else:
2873             tarinfo = member
2874
2875         # Prepare the link target for makelink().
2876         if tarinfo.islnk():
2877             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2878
2879         if symlink_cb is not None and tarinfo.issym():
2880             return symlink_cb(member, path, set_attrs)
2881
2882         try:
2883             self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2884                                  set_attrs=set_attrs)
2885         except EnvironmentError as e:
2886             if self.errorlevel > 0:
2887                 raise
2888             else:
2889                 if e.filename is None:
2890                     self._dbg(1, "tarfile: %s" % e.strerror)
2891                 else:
2892                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2893         except ExtractError as e:
2894             if self.errorlevel > 1:
2895                 raise
2896             else:
2897                 self._dbg(1, "tarfile: %s" % e)
2898
2899     def extractfile(self, member):
2900         """Extract a member from the archive as a file object. `member' may be
2901            a filename or a TarInfo object. If `member' is a regular file or a
2902            link, an io.BufferedReader object is returned. Otherwise, None is
2903            returned.
2904         """
2905         self._check("r")
2906
2907         if isinstance(member, str):
2908             tarinfo = self.getmember(member)
2909         else:
2910             tarinfo = member
2911
2912         if tarinfo.isreg() or tarinfo.ismultivol() or\
2913             tarinfo.type not in SUPPORTED_TYPES:
2914             # If a member's type is unknown, it is treated as a
2915             # regular file.
2916             return self.fileobject(self, tarinfo)
2917
2918         elif tarinfo.islnk() or tarinfo.issym():
2919             if isinstance(self.fileobj, _Stream):
2920                 # A small but ugly workaround for the case that someone tries
2921                 # to extract a (sym)link as a file-object from a non-seekable
2922                 # stream of tar blocks.
2923                 raise StreamError("cannot extract (sym)link as file object")
2924             else:
2925                 # A (sym)link's file object is its target's file object.
2926                 return self.extractfile(self._find_link_target(tarinfo))
2927         else:
2928             # If there's no data associated with the member (directory, chrdev,
2929             # blkdev, etc.), return None instead of a file object.
2930             return None
2931
2932     def _extract_member(self, tarinfo, targetpath, set_attrs=True):
2933         """Extract the TarInfo object tarinfo to a physical
2934            file called targetpath.
2935         """
2936         # Fetch the TarInfo object for the given name
2937         # and build the destination pathname, replacing
2938         # forward slashes to platform specific separators.
2939         targetpath = targetpath.rstrip("/")
2940         targetpath = targetpath.replace("/", os.sep)
2941
2942         # Create all upper directories.
2943         upperdirs = os.path.dirname(targetpath)
2944         if upperdirs and not os.path.exists(upperdirs):
2945             # Create directories that are not part of the archive with
2946             # default permissions.
2947             os.makedirs(upperdirs)
2948
2949         if tarinfo.islnk() or tarinfo.issym():
2950             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2951         else:
2952             self._dbg(1, tarinfo.name)
2953
2954         if tarinfo.isreg():
2955             self.makefile(tarinfo, targetpath)
2956         elif tarinfo.isdir():
2957             self.makedir(tarinfo, targetpath)
2958         elif tarinfo.isfifo():
2959             self.makefifo(tarinfo, targetpath)
2960         elif tarinfo.ischr() or tarinfo.isblk():
2961             self.makedev(tarinfo, targetpath)
2962         elif tarinfo.islnk() or tarinfo.issym():
2963             self.makelink(tarinfo, targetpath)
2964         elif tarinfo.type not in SUPPORTED_TYPES:
2965             self.makeunknown(tarinfo, targetpath)
2966         else:
2967             self.makefile(tarinfo, targetpath)
2968
2969         if set_attrs:
2970             self.chown(tarinfo, targetpath)
2971             if not tarinfo.issym():
2972                 self.chmod(tarinfo, targetpath)
2973                 self.utime(tarinfo, targetpath)
2974
2975     #--------------------------------------------------------------------------
2976     # Below are the different file methods. They are called via
2977     # _extract_member() when extract() is called. They can be replaced in a
2978     # subclass to implement other functionality.
2979
2980     def makedir(self, tarinfo, targetpath):
2981         """Make a directory called targetpath.
2982         """
2983         try:
2984             # Use a safe mode for the directory, the real mode is set
2985             # later in _extract_member().
2986             os.mkdir(targetpath, 0o0700)
2987         except FileExistsError:
2988             pass
2989
2990     def makefile(self, tarinfo, targetpath):
2991         """Make a file called targetpath.
2992         """
2993         source = self.fileobj
2994         source.seek(tarinfo.offset_data)
2995         decrypt = False
2996         iterate = True
2997         target = bltn_open(targetpath, "wb")
2998
2999         if tarinfo.sparse is not None:
3000             try:
3001                 for offset, size in tarinfo.sparse:
3002                     target.seek(offset)
3003                     copyfileobj(source, target, size)
3004                 target.seek(tarinfo.size)
3005                 target.truncate()
3006             finally:
3007                 target.close()
3008                 return
3009
3010         while iterate:
3011             iterate = False
3012             try:
3013                 copyfileobj(source, target, tarinfo.size)
3014             except OSError:
3015                 source.close()
3016                 # only if we are extracting a multivolume this can be treated
3017                 if not self.new_volume_handler:
3018                     target.close()
3019                     raise Exception("We need to read a new volume and you"
3020                         " didn't supply a new_volume_handler")
3021
3022                 # the new volume handler should do everything needed to
3023                 # start working in a new volume. usually, the handler calls
3024                 # to self.open_volume
3025                 self.volume_number += 1
3026                 self.new_volume_handler(self, self.base_name, self.volume_number)
3027                 tarinfo = self.firstmember
3028                 source = self.fileobj
3029                 iterate = True
3030         target.close()
3031
3032
3033     def makeunknown(self, tarinfo, targetpath):
3034         """Make a file from a TarInfo object with an unknown type
3035            at targetpath.
3036         """
3037         self.makefile(tarinfo, targetpath)
3038         self._dbg(1, "tarfile: Unknown file type %r, " \
3039                      "extracted as regular file." % tarinfo.type)
3040
3041     def makefifo(self, tarinfo, targetpath):
3042         """Make a fifo called targetpath.
3043         """
3044         if hasattr(os, "mkfifo"):
3045             os.mkfifo(targetpath)
3046         else:
3047             raise ExtractError("fifo not supported by system")
3048
3049     def makedev(self, tarinfo, targetpath):
3050         """Make a character or block device called targetpath.
3051         """
3052         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
3053             raise ExtractError("special devices not supported by system")
3054
3055         mode = tarinfo.mode
3056         if tarinfo.isblk():
3057             mode |= stat.S_IFBLK
3058         else:
3059             mode |= stat.S_IFCHR
3060
3061         os.mknod(targetpath, mode,
3062                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
3063
3064     def makelink(self, tarinfo, targetpath):
3065         """Make a (symbolic) link called targetpath. If it cannot be created
3066           (platform limitation), we try to make a copy of the referenced file
3067           instead of a link.
3068         """
3069         try:
3070             # For systems that support symbolic and hard links.
3071             if tarinfo.issym():
3072                 os.symlink(tarinfo.linkname, targetpath)
3073             else:
3074                 # See extract().
3075                 if os.path.exists(tarinfo._link_target):
3076                     os.link(tarinfo._link_target, targetpath)
3077                 else:
3078                     self._extract_member(self._find_link_target(tarinfo),
3079                                          targetpath)
3080         except symlink_exception:
3081             try:
3082                 self._extract_member(self._find_link_target(tarinfo),
3083                                      targetpath)
3084             except KeyError:
3085                 raise ExtractError("unable to resolve link inside archive")
3086
3087     def chown(self, tarinfo, targetpath):
3088         """Set owner of targetpath according to tarinfo.
3089         """
3090         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
3091             # We have to be root to do so.
3092             try:
3093                 g = grp.getgrnam(tarinfo.gname)[2]
3094             except KeyError:
3095                 g = tarinfo.gid
3096             try:
3097                 u = pwd.getpwnam(tarinfo.uname)[2]
3098             except KeyError:
3099                 u = tarinfo.uid
3100             try:
3101                 if tarinfo.issym() and hasattr(os, "lchown"):
3102                     os.lchown(targetpath, u, g)
3103                 else:
3104                     os.chown(targetpath, u, g)
3105             except OSError as e:
3106                 raise ExtractError("could not change owner")
3107
3108     def chmod(self, tarinfo, targetpath):
3109         """Set file permissions of targetpath according to tarinfo.
3110         """
3111         if hasattr(os, 'chmod'):
3112             try:
3113                 os.chmod(targetpath, tarinfo.mode)
3114             except OSError as e:
3115                 raise ExtractError("could not change mode")
3116
3117     def utime(self, tarinfo, targetpath):
3118         """Set modification time of targetpath according to tarinfo.
3119         """
3120         if not hasattr(os, 'utime'):
3121             return
3122         try:
3123             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
3124         except OSError as e:
3125             raise ExtractError("could not change modification time")
3126
3127     #--------------------------------------------------------------------------
3128     def next(self):
3129         """Return the next member of the archive as a TarInfo object, when
3130            TarFile is opened for reading. Return None if there is no more
3131            available.
3132         """
3133         self._check("ra")
3134         if self.firstmember is not None:
3135             m = self.firstmember
3136             self.firstmember = None
3137             return m
3138
3139         # Read the next block.
3140         self.fileobj.seek(self.offset)
3141         tarinfo = None
3142         while True:
3143             try:
3144                 tarinfo = self.tarinfo.fromtarfile(self)
3145             except EOFHeaderError as e:
3146                 if self.ignore_zeros:
3147                     self._dbg(2, "0x%X: %s" % (self.offset, e))
3148                     self.offset += BLOCKSIZE
3149                     continue
3150             except InvalidHeaderError as e:
3151                 if self.ignore_zeros:
3152                     self._dbg(2, "0x%X: %s" % (self.offset, e))
3153                     self.offset += BLOCKSIZE
3154                     continue
3155                 elif self.offset == 0:
3156                     raise ReadError(str(e))
3157             except EmptyHeaderError:
3158                 if self.offset == 0:
3159                     raise ReadError("empty file")
3160             except TruncatedHeaderError as e:
3161                 if self.offset == 0:
3162                     raise ReadError(str(e))
3163             except SubsequentHeaderError as e:
3164                 raise ReadError(str(e))
3165             break
3166
3167         if tarinfo is not None:
3168             if self.save_to_members:
3169                 self.members.append(tarinfo)
3170         else:
3171             self._loaded = True
3172
3173         return tarinfo
3174
3175     #--------------------------------------------------------------------------
3176     # Little helper methods:
3177
3178     def _getmember(self, name, tarinfo=None, normalize=False):
3179         """Find an archive member by name from bottom to top.
3180            If tarinfo is given, it is used as the starting point.
3181         """
3182         # Ensure that all members have been loaded.
3183         members = self.getmembers()
3184
3185         # Limit the member search list up to tarinfo.
3186         if tarinfo is not None:
3187             members = members[:members.index(tarinfo)]
3188
3189         if normalize:
3190             name = os.path.normpath(name)
3191
3192         for member in reversed(members):
3193             if normalize:
3194                 member_name = os.path.normpath(member.name)
3195             else:
3196                 member_name = member.name
3197
3198             if name == member_name:
3199                 return member
3200
3201     def _load(self):
3202         """Read through the entire archive file and look for readable
3203            members.
3204         """
3205         while True:
3206             tarinfo = self.next()
3207             if tarinfo is None:
3208                 break
3209         self._loaded = True
3210
3211     def _check(self, mode=None):
3212         """Check if TarFile is still open, and if the operation's mode
3213            corresponds to TarFile's mode.
3214         """
3215         if self.closed:
3216             raise OSError("%s is closed" % self.__class__.__name__)
3217         if mode is not None and self.mode not in mode:
3218             raise OSError("bad operation for mode %r" % self.mode)
3219
3220     def _find_link_target(self, tarinfo):
3221         """Find the target member of a symlink or hardlink member in the
3222            archive.
3223         """
3224         if tarinfo.issym():
3225             # Always search the entire archive.
3226             linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
3227             limit = None
3228         else:
3229             # Search the archive before the link, because a hard link is
3230             # just a reference to an already archived file.
3231             linkname = tarinfo.linkname
3232             limit = tarinfo
3233
3234         member = self._getmember(linkname, tarinfo=limit, normalize=True)
3235         if member is None:
3236             raise KeyError("linkname %r not found" % linkname)
3237         return member
3238
3239     def __iter__(self):
3240         """Provide an iterator object.
3241         """
3242         if self._loaded:
3243             return iter(self.members)
3244         else:
3245             return TarIter(self)
3246
3247     def _dbg(self, level, msg, *args):
3248         """Write debugging output to sys.stderr.
3249         """
3250         if level <= self.debug:
3251             print(msg.format(*args), file=sys.stderr)
3252
3253     def __enter__(self):
3254         self._check()
3255         return self
3256
3257     def __exit__(self, type, value, traceback):
3258         if type is None:
3259             self.close()
3260         else:
3261             # An exception occurred. We must not call close() because
3262             # it would try to write end-of-archive blocks and padding.
3263             if not self._extfileobj:
3264                 self.fileobj.close()
3265             self.closed = True
3266 # class TarFile
3267
3268 class TarIter:
3269     """Iterator Class.
3270
3271        for tarinfo in TarFile(...):
3272            suite...
3273     """
3274
3275     def __init__(self, tarfile):
3276         """Construct a TarIter object.
3277         """
3278         self.tarfile = tarfile
3279         self.index = 0
3280     def __iter__(self):
3281         """Return iterator object.
3282         """
3283         return self
3284     def __next__(self):
3285         """Return the next item using TarFile's next() method.
3286            When all members have been read, set TarFile as _loaded.
3287         """
3288         # Fix for SF #1100429: Under rare circumstances it can
3289         # happen that getmembers() is called during iteration,
3290         # which will cause TarIter to stop prematurely.
3291
3292         if self.index == 0 and self.tarfile.firstmember is not None:
3293             tarinfo = self.tarfile.next()
3294         elif self.index < len(self.tarfile.members):
3295             tarinfo = self.tarfile.members[self.index]
3296         elif not self.tarfile._loaded:
3297             tarinfo = self.tarfile.next()
3298             if not tarinfo:
3299                 self.tarfile._loaded = True
3300                 raise StopIteration
3301         else:
3302             raise StopIteration
3303         self.index += 1
3304
3305         return tarinfo
3306
3307 #---------------------------------------------------------
3308 # support functionality for rescue mode
3309 #---------------------------------------------------------
3310
3311 def read_tarobj_at_offset (fileobj, offset, mode, secret=None):
3312     decr = None
3313     ks   = secret [0]
3314
3315     if ks == crypto.PDTCRYPT_SECRET_PW:
3316         decr = crypto.Decrypt (password=secret [1])
3317     elif ks == crypto.PDTCRYPT_SECRET_KEY:
3318         key = binascii.unhexlify (secret [1])
3319         decr = crypto.Decrypt (key=key)
3320     else:
3321         raise RuntimeError
3322
3323     tarobj = \
3324         TarFile.open_at_offset (offset,
3325                                 mode=mode,
3326                                 fileobj=fileobj,
3327                                 format=GNU_FORMAT,
3328                                 concat='#' in mode,
3329                                 encryption=decr,
3330                                 save_to_members=False,
3331                                 tolerance=TOLERANCE_RESCUE)
3332
3333     return tarobj.next ()
3334
3335
3336 def idxent_of_tarinfo (tarinfo):
3337     """
3338     Scrape the information relevant for the index from a *TarInfo* object.
3339     Keys like the inode number that lack a corresponding field in a TarInfo
3340     will be set to some neutral value.
3341     Example output:
3342
3343         { "inode"  : 0
3344         , "uid"    : 0
3345         , "path"   : "snapshot://annotations.db"
3346         , "offset" : 0
3347         , "volume" : 0
3348         , "mode"   : 33152
3349         , "ctime"  : 1502798115
3350         , "mtime"  : 1502196423
3351         , "size"   : 144
3352         , "type"   : "file"
3353         , "gid"    : 0
3354         }
3355
3356     """
3357
3358     return \
3359         { "inode"  : 0            # ignored when reading the index
3360         , "uid"    : tarinfo.uid
3361         , "gid"    : tarinfo.gid
3362         , "path"   : tarinfo.name # keeping URI scheme
3363         , "offset" : 0            # to be added by the caller
3364         , "volume" : tarinfo.volume_offset
3365         , "mode"   : tarinfo.mode
3366         , "ctime"  : tarinfo.mtime
3367         , "mtime"  : tarinfo.mtime
3368         , "size"   : tarinfo.size
3369         , "type"   : tarinfo.type
3370         }
3371
3372
3373 def gen_rescue_index (backup_tar_path, mode, password=None, key=None):
3374     psidx   = [] # pseudo index, return value
3375     offsets = None
3376     secret  = None
3377     mode    = "r" + mode
3378
3379     if password is not None:
3380         secret = (crypto.PDTCRYPT_SECRET_PW, password)
3381     elif key is not None:
3382         secret = (crypto.PDTCRYPT_SECRET_KEY, key)
3383
3384     if secret is not None:
3385         offsets = crypto.reconstruct_offsets (backup_tar_path, secret)
3386         fileobj = bltn_open (backup_tar_path, "rb")
3387         infos   = [ (off, read_tarobj_at_offset (fileobj, off, mode, secret=secret))
3388                     for off in offsets ]
3389         def aux (o, ti):
3390             ie = idxent_of_tarinfo (ti)
3391             ie ["offset"] = o
3392             return ie
3393         psidx   = [ aux (o, ti) for o, ti in infos ]
3394
3395     return psidx
3396
3397 #--------------------
3398 # exported functions
3399 #--------------------
3400 def is_tarfile(name):
3401     """Return True if name points to a tar archive that we
3402        are able to handle, else return False.
3403     """
3404     try:
3405         t = open(name)
3406         t.close()
3407         return True
3408     except TarError:
3409         return False
3410
3411 bltn_open = open
3412 open = TarFile.open