2 #-------------------------------------------------------------------
4 #-------------------------------------------------------------------
5 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
8 # Permission is hereby granted, free of charge, to any person
9 # obtaining a copy of this software and associated documentation
10 # files (the "Software"), to deal in the Software without
11 # restriction, including without limitation the rights to use,
12 # copy, modify, merge, publish, distribute, sublicense, and/or sell
13 # copies of the Software, and to permit persons to whom the
14 # Software is furnished to do so, subject to the following
17 # The above copyright notice and this permission notice shall be
18 # included in all copies or substantial portions of the Software.
20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27 # OTHER DEALINGS IN THE SOFTWARE.
29 """Read from and write to tar format archives.
32 __version__ = "$Revision: 85213 $"
36 __author__ = "Lars Gustäbel (lars@gustaebel.de)"
39 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend, Eduardo Robles."
57 import traceback # XXX
66 # os.symlink on Windows prior to 6.0 raises NotImplementedError
67 symlink_exception = (AttributeError, NotImplementedError)
69 # OSError (winerror=1314) will be raised if the caller does not hold the
70 # SeCreateSymbolicLinkPrivilege privilege
71 symlink_exception += (OSError,)
75 # from tarfile import *
76 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
78 from builtins import open as _open # Since 'open' is TarFile.open
80 #---------------------------------------------------------
82 #---------------------------------------------------------
83 NUL = b"\0" # the null character
84 BLOCKSIZE = 512 # length of processing blocks
85 RECORDSIZE = BLOCKSIZE * 20 # length of records
86 GNU_MAGIC = b"ustar \0" # magic gnu tar string
87 POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
89 LENGTH_NAME = 100 # maximum length of a filename
90 LENGTH_LINK = 100 # maximum length of a linkname
91 LENGTH_PREFIX = 155 # maximum length of the prefix field
93 REGTYPE = b"0" # regular file
94 AREGTYPE = b"\0" # regular file
95 LNKTYPE = b"1" # link (inside tarfile)
96 SYMTYPE = b"2" # symbolic link
97 CHRTYPE = b"3" # character special device
98 BLKTYPE = b"4" # block special device
99 DIRTYPE = b"5" # directory
100 FIFOTYPE = b"6" # fifo special device
101 CONTTYPE = b"7" # contiguous file
103 GNUTYPE_LONGNAME = b"L" # GNU tar longname
104 GNUTYPE_LONGLINK = b"K" # GNU tar longlink
105 GNUTYPE_SPARSE = b"S" # GNU tar sparse file
106 GNUTYPE_MULTIVOL = b"M" # GNU tar continuation of a file that began on
109 XHDTYPE = b"x" # POSIX.1-2001 extended header
110 XGLTYPE = b"g" # POSIX.1-2001 global header
111 SOLARIS_XHDTYPE = b"X" # Solaris extended header
113 USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
114 GNU_FORMAT = 1 # GNU tar format
115 PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
116 DEFAULT_FORMAT = GNU_FORMAT
118 GZ_FMT_HEADER = b"<BBBBLBB"
119 GZ_HEADER_SIZE = 10 # not including the name
120 GZ_MAGIC = (0x1f, 0x8b) # 0o37, 0o213
121 GZ_METHOD_DEFLATE = 0x08 # 0o10
122 GZ_FLAG_ORIG_NAME = 0x08 # 0o10, default in gzip
123 GZ_DEFLATE_FLAGS = 0x00 # 0o00, never read (deflate.c)
124 GZ_OS_CODE = 0x03 # 0o03, default in gzip (tailor.h)
125 GZ_MAGIC_BYTES = struct.pack ("<BB", GZ_MAGIC [0], GZ_MAGIC [1])
126 GZ_MAGIC_DEFLATE = struct.pack ("<BBB", GZ_MAGIC [0], GZ_MAGIC [1],
130 TOLERANCE_RECOVER = 1 # rely on offsets in index
131 TOLERANCE_RESCUE = 2 # deduce metadata from archive contents
133 #---------------------------------------------------------
134 # archive handling mode
135 #---------------------------------------------------------
138 ARCMODE_ENCRYPT = 1 << 0
139 ARCMODE_COMPRESS = 1 << 1
140 ARCMODE_CONCAT = 1 << 2
143 if m == ARCMODE_PLAIN:
147 def chkappend (b, s):
152 if first is True: first = False
155 chkappend (ARCMODE_ENCRYPT, "ENCRYPT")
156 chkappend (ARCMODE_COMPRESS, "COMPRESS")
157 chkappend (ARCMODE_CONCAT, "CONCAT")
161 def arcmode_set (concat=False, encryption=None, comptype=None, init=ARCMODE_PLAIN):
163 if bool (concat) is True:
164 ret |= ARCMODE_CONCAT
165 if encryption is not None:
166 ret |= ARCMODE_ENCRYPT
168 ret |= ARCMODE_COMPRESS
171 #---------------------------------------------------------
173 #---------------------------------------------------------
174 # File types that tarfile supports:
175 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
176 SYMTYPE, DIRTYPE, FIFOTYPE,
177 CONTTYPE, CHRTYPE, BLKTYPE,
178 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
179 GNUTYPE_SPARSE, GNUTYPE_MULTIVOL)
181 # File types that will be treated as a regular file.
182 REGULAR_TYPES = (REGTYPE, AREGTYPE,
183 CONTTYPE, GNUTYPE_SPARSE)
185 # File types that are part of the GNU tar format.
186 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
187 GNUTYPE_SPARSE, GNUTYPE_MULTIVOL)
189 # Fields from a pax header that override a TarInfo attribute.
190 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
191 "uid", "gid", "uname", "gname")
193 # Fields from a pax header that are affected by hdrcharset.
194 PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
196 # Fields in a pax header that are numbers, all other fields
197 # are treated as strings.
198 PAX_NUMBER_FIELDS = {
207 #---------------------------------------------------------
209 #---------------------------------------------------------
211 if os.name in ("nt", "ce"):
214 ENCODING = sys.getfilesystemencoding()
216 #---------------------------------------------------------
217 # Some useful functions
218 #---------------------------------------------------------
220 def stn(s, length, encoding, errors):
221 """Convert a string to a null-terminated bytes object.
223 s = s.encode(encoding, errors)
224 return s[:length] + (length - len(s)) * NUL
226 def nts(s, encoding, errors):
227 """Convert a null-terminated bytes object to a string.
232 return s.decode(encoding, errors)
234 def sbtn(s, length, encoding, errors):
235 """Convert a string or a bunch of bytes to a null-terminated bytes object
238 if isinstance(s, str):
239 s = s.encode(encoding, errors)
240 return s[:length] + (length - len(s)) * NUL
243 """Convert a number field to a python number.
245 # There are two possible encodings for a number field, see
247 if s[0] in (0o200, 0o377):
249 for i in range(len(s) - 1):
253 n = -(256 ** (len(s) - 1) - n)
256 n = int(nts(s, "ascii", "strict") or "0", 8)
258 raise InvalidHeaderError("invalid header")
261 def itn(n, digits=8, format=DEFAULT_FORMAT):
262 """Convert a python number to a number field.
264 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
265 # octal digits followed by a null-byte, this allows values up to
266 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
267 # that if necessary. A leading 0o200 or 0o377 byte indicate this
268 # particular encoding, the following digits-1 bytes are a big-endian
269 # base-256 representation. This allows values up to (256**(digits-1))-1.
270 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
272 if 0 <= n < 8 ** (digits - 1):
273 s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL
274 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
276 s = bytearray([0o200])
278 s = bytearray([0o377])
279 n = 256 ** digits + n
281 for i in range(digits - 1):
282 s.insert(1, n & 0o377)
285 raise ValueError("overflow in number field")
289 def calc_chksums(buf):
290 """Calculate the checksum for a member's header by summing up all
291 characters except for the chksum field which is treated as if
292 it was filled with spaces. According to the GNU tar sources,
293 some tars (Sun and NeXT) calculate chksum with signed char,
294 which will be different if there are chars in the buffer with
295 the high bit set. So we calculate two checksums, unsigned and
298 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
299 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
300 return unsigned_chksum, signed_chksum
302 def copyfileobj(src, dst, length=None):
303 """Copy length bytes from fileobj src to fileobj dst.
304 If length is None, copy the entire content.
309 shutil.copyfileobj(src, dst)
313 blocks, remainder = divmod(length, BUFSIZE)
314 for b in range(blocks):
315 buf = src.read(BUFSIZE)
317 if len(buf) < BUFSIZE:
318 raise OSError("end of file reached")
320 buf = src.read(remainder)
322 if len(buf) < remainder:
323 raise OSError("end of file reached")
327 """Deprecated in this location; use stat.filemode."""
329 warnings.warn("deprecated in favor of stat.filemode",
330 DeprecationWarning, 2)
331 return stat.filemode(mode)
333 class TarError(Exception):
334 """Base exception."""
336 class ExtractError(TarError):
337 """General exception for extract errors."""
339 class ReadError(TarError):
340 """Exception for unreadable tar archives."""
342 class CompressionError(TarError):
343 """Exception for unavailable compression methods."""
345 class StreamError(TarError):
346 """Exception for unsupported operations on stream-like TarFiles."""
348 class HeaderError(TarError):
349 """Base exception for header errors."""
351 class EmptyHeaderError(HeaderError):
352 """Exception for empty headers."""
354 class TruncatedHeaderError(HeaderError):
355 """Exception for truncated headers."""
357 class EOFHeaderError(HeaderError):
358 """Exception for end of file headers."""
360 class InvalidHeaderError(HeaderError):
361 """Exception for invalid headers."""
363 class SubsequentHeaderError(HeaderError):
364 """Exception for missing and invalid extended headers."""
366 class InvalidEncryptionError(TarError):
367 """Exception for undefined crypto modes and combinations."""
369 class DecryptionError(TarError):
370 """Exception for error during decryption."""
372 class EncryptionError(TarError):
373 """Exception for error during encryption."""
375 class EndOfFile(Exception):
376 """Signal end of file condition when they’re not an error."""
378 #---------------------------
379 # internal stream interface
380 #---------------------------
382 """Low-level file object. Supports reading and writing.
383 It is used instead of a regular file object for streaming
387 def __init__(self, name, mode):
390 "w": os.O_RDWR | os.O_CREAT | os.O_TRUNC,
392 if hasattr(os, "O_BINARY"):
393 _mode |= os.O_BINARY # pylint: disable=no-member
394 self.fd = os.open(name, _mode, 0o666)
400 def read(self, size):
401 ret = os.read(self.fd, size)
402 self.offset += len(ret)
405 def write(self, s, pos=None):
408 os.lseek (self.fd, pos, os.SEEK_SET)
409 n = os.write(self.fd, s)
411 self.offset += len(s)
413 append = pos + n - p0
415 self.offset += append
416 os.lseek (self.fd, p0, os.SEEK_SET)
421 def seek_set (self, pos):
422 os.lseek (self.fd, pos, os.SEEK_SET)
426 def gz_header (name=None):
427 timestamp = int(time.time())
433 flags |= GZ_FLAG_ORIG_NAME
434 if type(name) is str:
435 name = name.encode("iso-8859-1", "replace")
436 if name.endswith(b".pdtcrypt"):
438 if name.endswith(b".gz"):
440 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
443 hdr = struct.pack (GZ_FMT_HEADER,
444 GZ_MAGIC [0], GZ_MAGIC [1],
445 GZ_METHOD_DEFLATE, flags,
447 GZ_DEFLATE_FLAGS, GZ_OS_CODE)
453 """Class that serves as an adapter between TarFile and
454 a stream-like object. The stream-like object only
455 needs to have a read() or write() method and is accessed
456 blockwise. Use of gzip or bzip2 compression is possible.
457 A stream-like object could be for example: sys.stdin,
458 sys.stdout, a socket, a tape device etc.
460 _Stream is intended to be used only internally but is
461 nevertherless used externally by Deltatar.
463 When encrypting, the ``enccounter`` will be used for
464 initializing the first cryptographic context. When
465 decrypting, its value will be compared to the decrypted
466 object. Decryption fails if the value does not match.
467 In effect, this means that a ``_Stream`` whose ctor was
468 passed ``enccounter`` can only be used to encrypt or
469 decrypt a single object.
472 remainder = -1 # track size in encrypted entries
473 tolerance = TOLERANCE_STRICT
475 def __init__(self, name, mode, comptype, fileobj, bufsize,
476 concat=False, encryption=None, enccounter=None,
477 compresslevel=9, tolerance=TOLERANCE_STRICT):
478 """Construct a _Stream object.
480 self.arcmode = arcmode_set (concat, encryption, comptype)
481 self.tolerance = tolerance
483 self._extfileobj = True
485 fileobj = _LowLevelFile(name, mode)
486 self._extfileobj = False
489 # Enable transparent compression detection for the
491 fileobj = _StreamProxy(fileobj)
492 comptype = fileobj.getcomptype()
496 self.enccounter = None
497 if self.arcmode & ARCMODE_ENCRYPT:
498 self.enccounter = enccounter
500 self.name = name or ""
502 self.comptype = comptype
504 self.fileobj = fileobj
505 self.bufsize = bufsize
511 self.last_block_offset = 0
512 self.dbuf = b"" # ???
513 self.exception = None # communicate decompression failure
514 self.compresslevel = compresslevel
515 self.bytes_written = 0
517 self.encryption = encryption
525 raise CompressionError("zlib module is not available")
528 self.exception = zlib.error
531 if not (self.arcmode & ARCMODE_CONCAT):
532 if self.arcmode & ARCMODE_ENCRYPT:
533 self._init_write_encrypt (name)
534 self._init_write_gz ()
535 self.crc = zlib.crc32(b"") & 0xFFFFffff
537 elif comptype == "bz2":
538 if self.arcmode & ARCMODE_ENCRYPT:
539 raise InvalidEncryptionError("encryption not available for "
540 "compression “%s”" % comptype)
544 raise CompressionError("bz2 module is not available")
547 self.cmp = bz2.BZ2Decompressor()
548 self.exception = OSError
550 self.cmp = bz2.BZ2Compressor()
552 elif comptype == 'xz':
553 if self.arcmode & ARCMODE_ENCRYPT:
554 raise InvalidEncryptionError("encryption not available for "
555 "compression “%s”" % comptype)
559 raise CompressionError("lzma module is not available")
562 self.cmp = lzma.LZMADecompressor()
563 self.exception = lzma.LZMAError
565 self.cmp = lzma.LZMACompressor()
567 elif comptype == "tar":
568 if not (self.arcmode & ARCMODE_CONCAT) \
570 and self.arcmode & ARCMODE_ENCRYPT:
571 self._init_write_encrypt (name)
574 if self.arcmode & ARCMODE_ENCRYPT:
575 raise InvalidEncryptionError("encryption not available for "
576 "compression “%s”" % comptype)
577 raise CompressionError("unknown compression type %r" % comptype)
580 if not self._extfileobj:
586 if hasattr(self, "closed") and not self.closed:
589 except crypto.InternalError:
590 # context already finalized due to abort but close() tried
595 def next (self, name):
596 if self.arcmode & ARCMODE_COMPRESS:
597 if getattr (self, "cmp", None) is not None:
598 self._finalize_write_gz ()
600 if self.arcmode & ~(ARCMODE_ENCRYPT | ARCMODE_COMPRESS):
601 self.last_block_offset = self.fileobj.tell()
602 if self.arcmode & ARCMODE_ENCRYPT:
603 self._finalize_write_encrypt ()
604 self._init_write_encrypt (name, set_last_block_offset=True)
605 if self.arcmode & ARCMODE_COMPRESS:
606 self._init_write_gz (set_last_block_offset =
607 not (self.arcmode & ARCMODE_ENCRYPT))
608 return self.last_block_offset
611 def next_volume (self, name):
612 # with non-concat modes, this is taken care by the _Stream
613 # ctor as invoked by the newvol handler
614 if self.arcmode & ARCMODE_COMPRESS:
615 if getattr (self, "cmp", None) is not None:
616 # e. g. compressed PAX header written
617 self._finalize_write_gz ()
618 if self.arcmode & ARCMODE_ENCRYPT:
619 self._init_write_encrypt (name)
620 if self.arcmode & ARCMODE_COMPRESS:
621 self._init_write_gz ()
624 def _init_write_encrypt (self, entry=None, set_last_block_offset=False):
626 Save position for delayed write of header; fill the header location
629 # first thing, proclaim new object to the encryption context
630 # secondly, assemble the header with the updated parameters
631 # and commit it directly to the underlying stream, bypassing the
632 # encryption layer in .__write().
633 dummyhdr = self.encryption.next (entry, counter=self.enccounter)
635 raise EncryptionError ("Crypto.next(): bad dummy header") # XXX
636 self.lasthdr = self.fileobj.tell()
637 self.__write_to_file(dummyhdr)
638 if set_last_block_offset is True:
639 self.last_block_offset = self.lasthdr
642 def _finalize_write_encrypt (self):
644 Seek back to header position, read dummy bytes, finalize crypto
645 obtaining the actual header, write header, seek back to current
648 Returns the list of IV fixed parts as used during encryption.
650 if self.lasthdr is not None:
651 pos0 = self.fileobj.tell ()
652 self.fileobj.seek_set (self.lasthdr)
653 dummy = self.fileobj.read (crypto.PDTCRYPT_HDR_SIZE)
654 pos1 = self.fileobj.tell ()
655 dpos = pos1 - self.lasthdr
656 assert dpos == crypto.PDTCRYPT_HDR_SIZE
657 self.fileobj.seek_set (pos0)
658 data, hdr, _ = self.encryption.done (dummy)
659 self.__write_to_file(hdr, pos=self.lasthdr)
660 self.__write_to_file(data) # append remainder of data
664 def _finalize_write_gz (self):
665 if self.cmp is not None:
666 chunk = self.buf + self.cmp.flush()
668 if self.comptype == "gz":
669 # The native zlib crc is an unsigned 32-bit integer, but
670 # the Python wrapper implicitly casts that to a signed C
671 # long. So, on a 32-bit box self.crc may "look negative",
672 # while the same crc on a 64-bit box may "look positive".
673 # To avoid irksome warnings from the `struct` module, force
674 # it to look positive on all boxes.
675 chunk += struct.pack("<L", self.crc & 0xffffffff)
676 chunk += struct.pack("<L", self.concat_pos & 0xffffFFFF)
677 self.__enc_write (chunk)
681 def _init_write_gz (self, set_last_block_offset=False):
683 Add a new gzip block, closing last one
686 self.crc = self.zlib.crc32(b"") & 0xFFFFffff
687 first = self.cmp is None
688 self.cmp = self.zlib.compressobj(self.compresslevel,
690 -self.zlib.MAX_WBITS,
691 self.zlib.DEF_MEM_LEVEL,
694 # if aes, we encrypt after compression
695 if set_last_block_offset is True:
696 self.last_block_offset = self.fileobj.tell()
698 self.__write(gz_header (self.name if first is True else None))
702 """Write string s to the stream.
704 if self.comptype == "gz":
705 self.crc = self.zlib.crc32(s, self.crc) & 0xFFFFffff
707 self.concat_pos += len(s)
708 if self.cmp is not None:
709 s = self.cmp.compress(s)
713 """Write what’s left in the buffer to the stream."""
714 self.__write (b"") # → len (buf) <= bufsiz
715 self.__enc_write (self.buf)
718 def __write(self, s):
719 """Writes (and encodes) string s to the stream blockwise
721 will wait with encoding/writing until block is complete
724 while len(self.buf) > self.bufsize:
725 self.__enc_write(self.buf[:self.bufsize])
726 self.buf = self.buf[self.bufsize:]
729 def __write_to_file(self, s, pos=None):
731 Writes directly to the fileobj; updates self.bytes_written. If “pos” is
732 given, the stream will seek to that position first and back afterwards,
733 and the total of bytes written is not updated.
735 self.fileobj.write(s, pos)
737 self.bytes_written += len(s)
740 def __enc_write(self, s):
742 If encryption is active, the string s is encrypted before being written
747 if self.arcmode & ARCMODE_ENCRYPT:
750 n, ct = self.encryption.process(buf)
751 self.__write_to_file(ct)
754 # The entire plaintext was not consumed: The size limit
755 # for encrypted objects was reached. Transparently create
756 # a new encrypted object and continue processing the input.
757 self._finalize_write_encrypt ()
758 self._init_write_encrypt ()
760 self.__write_to_file(s)
763 def estim_file_size(self):
764 """ estimates size of file if closing it now
766 The result may differ greatly from the amount of data sent to write()
767 due to compression, encryption and buffering.
769 In tests the result (before calling close()) was up to 12k smaller than
770 the final file size if compression is being used because zlib/bz2
771 compressors do not allow inspection of their buffered data :-(
773 Still, we add what close() would add: 8 bytes for gz checksum, one
774 encryption block size if encryption is used and the size of our own
778 return self.bytes_written
780 result = self.bytes_written
782 result += len(self.buf)
783 if self.comptype == 'gz':
784 result += 8 # 2 longs = 8 byte (no extra info written for bzip2)
787 def close(self, close_fileobj=True):
788 """Close the _Stream object. No operation should be
789 done on it afterwards.
795 if close_fileobj is True:
798 if self.arcmode & ARCMODE_COMPRESS:
799 self._finalize_write_gz ()
800 # end of Tar archive marker (two empty blocks) was written
801 # finalize encryption last; no writes may be performed after
804 if self.arcmode & ARCMODE_ENCRYPT:
805 self._finalize_write_encrypt ()
807 if not self._extfileobj:
810 # read the zlib crc and length and check them
811 if self.mode == "r" and self.comptype == "gz":
812 read_crc = self.__read(4)
813 read_length = self.__read(4)
814 calculated_crc = self.crc
815 if struct.unpack("<L", read_crc)[0] != calculated_crc:
816 raise CompressionError("bad gzip crc")
820 def _init_read_gz(self):
821 """Initialize for reading a gzip compressed fileobj.
823 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
825 read2 = self.__read(2)
827 raise EndOfFile ("_init_read_gz(): read returned zero bytes at pos "
828 "%d" % self.fileobj.tell())
829 # taken from gzip.GzipFile with some alterations
830 if read2 != GZ_MAGIC_BYTES:
831 raise ReadError("not a gzip file")
833 read1 = self.__read(1)
835 raise CompressionError("unsupported compression method")
837 self.flags = flag = ord(self.__read(1))
841 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
846 if not s or s == NUL:
851 if not s or s == NUL:
856 def _init_read_encrypt (self):
857 """Initialize encryption for next entry in archive. Read a header and
858 notify the crypto context."""
859 if self.arcmode & ARCMODE_ENCRYPT:
860 lasthdr = self.fileobj.tell ()
862 hdr = crypto.hdr_read_stream (self.fileobj)
863 except crypto.EndOfFile:
865 except crypto.InvalidHeader as exn:
866 raise DecryptionError ("Crypto.hdr_read_stream(): error “%s” "
867 "processing %r at pos %d"
868 % (exn, self.fileobj, lasthdr)) \
870 if self.enccounter is not None:
871 # enforce that the iv counter in the header matches an
872 # explicitly requested one
873 iv = crypto.hdr_iv_counter (hdr)
874 if iv != self.enccounter:
875 raise DecryptionError ("expected IV counter %d, got %d"
876 % (self.enccounter, iv))
877 self.lasthdr = lasthdr
878 self.remainder = hdr ["ctsize"] # distance to next header
880 self.encryption.next (hdr)
881 except crypto.InvalidParameter as exn:
882 raise DecryptionError ("Crypto.next(): error “%s” "
883 "processing %r at pos %d"
884 % (exn, self.fileobj, lasthdr)) \
890 def _read_encrypt (self, buf):
892 Demote a program error to a decryption error in tolerant mode. This
893 allows recovery from corrupted headers and invalid data.
896 return self.encryption.process (buf)
897 except RuntimeError as exn:
898 if self.tolerance != TOLERANCE_STRICT:
899 raise DecryptionError (exn)
903 def _finalize_read_encrypt (self):
907 if self.arcmode & ARCMODE_ENCRYPT \
908 and self.lasthdr is not None :
909 assert self.remainder >= 0
910 if self.remainder > 0:
913 data = self.encryption.done ()
914 except crypto.InvalidGCMTag as exn:
915 raise DecryptionError ("decryption failed: %s" % exn)
920 """Return the stream's file pointer position.
924 def seek(self, pos=0):
925 """Set the stream's file pointer to pos. Negative seeking
928 if pos - self.pos >= 0:
929 blocks, remainder = divmod(pos - self.pos, self.bufsize)
930 for i in range(blocks):
931 self.read(self.bufsize)
934 raise StreamError("seeking backwards is not allowed")
937 def read(self, size=None):
938 """Return the next size number of bytes from the stream.
939 If size is not defined, return all bytes of the stream
945 buf = self._read(self.bufsize)
951 buf = self._read(size)
956 """Reads just one line, new line character included
958 # if \n in dbuf, no read neads to be done
959 if b'\n' in self.dbuf:
960 pos = self.dbuf.index(b'\n') + 1
961 ret = self.dbuf[:pos]
962 self.dbuf = self.dbuf[pos:]
967 chunk = self._read(self.bufsize)
969 # nothing more to read, so return the buffer
975 # if \n found, return the new line
978 pos = dbuf.index(b'\n') + 1
979 self.dbuf = dbuf[pos:] + self.dbuf
982 def _read(self, size):
983 """Return size bytes from the stream.
989 buf = self.__read(self.bufsize)
993 if self.cmp is not None:
995 buf = self.cmp.decompress(buf)
996 except self.exception as exn:
997 raise ReadError("invalid compressed data (%r)" % exn)
998 except Exception as e:
999 # happens at the end of the file
1000 # _init_read_gz failed in the previous iteration so
1001 # self.cmp.decompress fails here
1002 if self.arcmode & ARCMODE_CONCAT:
1005 raise ReadError("invalid compressed data")
1006 if self.arcmode & ARCMODE_COMPRESS and hasattr(self, "crc"):
1007 self.crc = self.zlib.crc32(buf, self.crc) & 0xFFFFffff
1008 if self.arcmode & ARCMODE_CONCAT \
1009 and len(self.cmp.unused_data) != 0:
1010 self.buf = self.cmp.unused_data + self.buf
1011 self.close(close_fileobj=False)
1013 self._init_read_gz()
1014 except DecryptionError:
1015 if self.tolerance != TOLERANCE_STRICT:
1016 # return whatever data was processed successfully
1023 # happens at the end of the file
1025 self.crc = self.zlib.crc32(b"") & 0xFFFFffff
1030 self.dbuf = t[size:]
1034 def __read(self, size):
1036 Return size bytes from stream. If internal buffer is empty, read
1037 another block from the stream.
1039 The function returns up to size bytes of data. When an error occurs
1040 during decryption, everything until the end of the last successfully
1041 finalized object is returned.
1044 t = [self.buf] if c > 0 else []
1045 good_crypto = len (t)
1050 if self.arcmode & ARCMODE_ENCRYPT:
1051 if self.remainder <= 0:
1052 # prepare next object
1053 if self._init_read_encrypt () is False: # EOF
1057 # only read up to the end of the encrypted object
1058 todo = min (size, self.remainder)
1059 buf = self.fileobj.read(todo)
1060 if self.arcmode & ARCMODE_ENCRYPT:
1062 buf = self._read_encrypt (buf)
1063 if todo == self.remainder:
1064 # at the end of a crypto object; finalization will fail if
1065 # the GCM tag does not match
1066 trailing = self._finalize_read_encrypt ()
1067 good_crypto = len (t) + 1
1068 if len (trailing) > 0:
1072 self.remainder -= todo
1073 except DecryptionError:
1074 if self.tolerance == TOLERANCE_STRICT:
1076 self.encryption.drop ()
1077 if good_crypto == 0:
1079 # this may occur at any of the three crypto operations above.
1080 # some objects did validate; discard all data after it; next
1081 # call will start with the bad object and error out immediately
1082 self.buf = b"".join (t [good_crypto:])
1083 return b"".join (t [:good_crypto])
1085 if not buf: ## XXX stream terminated prematurely; this should be an error
1096 class _StreamProxy(object):
1097 """Small proxy class that enables transparent compression
1098 detection for the Stream interface (mode 'r|*').
1101 def __init__(self, fileobj):
1102 self.fileobj = fileobj
1103 self.buf = self.fileobj.read(BLOCKSIZE)
1105 def read(self, size): # pylint: disable=method-hidden
1106 self.read = self.fileobj.read
1109 def getcomptype(self):
1110 if self.buf.startswith(GZ_MAGIC_DEFLATE):
1112 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
1114 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
1120 self.fileobj.close()
1123 #------------------------
1124 # Extraction file object
1125 #------------------------
1126 class _FileInFile(object):
1127 """A thin wrapper around an existing file object that
1128 provides a part of its data as an individual file
1132 def __init__(self, fileobj, offset, size, blockinfo=None):
1133 self.fileobj = fileobj
1134 self.offset = offset
1137 self.name = getattr(fileobj, "name", None)
1140 if blockinfo is None:
1141 blockinfo = [(0, size)]
1143 # Construct a map with data and zero blocks.
1147 realpos = self.offset
1148 for offset, size in blockinfo:
1149 if offset > lastpos:
1150 self.map.append((False, lastpos, offset, None))
1151 self.map.append((True, offset, offset + size, realpos))
1153 lastpos = offset + size
1154 if lastpos < self.size:
1155 self.map.append((False, lastpos, self.size, None))
1167 return self.fileobj.seekable()
1170 """Return the current file position.
1172 return self.position
1174 def seek(self, position, whence=io.SEEK_SET):
1175 """Seek to a position in the file.
1177 if whence == io.SEEK_SET:
1178 self.position = min(max(position, 0), self.size)
1179 elif whence == io.SEEK_CUR:
1181 self.position = max(self.position + position, 0)
1183 self.position = min(self.position + position, self.size)
1184 elif whence == io.SEEK_END:
1185 self.position = max(min(self.size + position, self.size), 0)
1187 raise ValueError("Invalid argument")
1188 return self.position
1190 def read(self, size=None):
1191 """Read data from the file.
1194 size = self.size - self.position
1196 size = min(size, self.size - self.position)
1201 data, start, stop, offset = self.map[self.map_index]
1202 if start <= self.position < stop:
1206 if self.map_index == len(self.map):
1208 length = min(size, stop - self.position)
1210 self.fileobj.seek(offset + (self.position - start))
1211 buf += self.fileobj.read(length)
1215 self.position += length
1218 def readinto(self, b):
1219 buf = self.read(len(b))
1228 class ExFileObject(io.BufferedReader):
1230 def __init__(self, tarfile, tarinfo):
1231 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
1232 tarinfo.size, tarinfo.sparse)
1233 super().__init__(fileobj)
1239 class TarInfo(object):
1240 """Informational class which holds the details about an
1241 archive member given by a tar header block.
1242 TarInfo objects are returned by TarFile.getmember(),
1243 TarFile.getmembers() and TarFile.gettarinfo() and are
1244 usually created internally.
1247 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
1248 "chksum", "type", "linkname", "uname", "gname",
1249 "devmajor", "devminor", "volume_offset",
1250 "offset", "offset_data", "pax_headers", "sparse",
1251 "tarfile", "_sparse_structs", "_link_target")
1253 def __init__(self, name=""):
1254 """Construct a TarInfo object. name is the optional name
1257 self.name = name # member name
1258 self.mode = 0o644 # file permissions
1259 self.uid = 0 # user id
1260 self.gid = 0 # group id
1261 self.size = 0 # file size
1262 self.mtime = 0 # modification time
1263 self.chksum = 0 # header checksum
1264 self.type = REGTYPE # member type
1265 self.linkname = "" # link name
1266 self.uname = "" # user name
1267 self.gname = "" # group name
1268 self.devmajor = 0 # device major number
1269 self.devminor = 0 # device minor number
1271 self.offset = 0 # the tar header starts here
1272 self.offset_data = 0 # the file's data starts here
1273 self.volume_offset = 0 # the file's data corresponds with the data
1274 # starting at this position
1276 self.sparse = None # sparse member information
1277 self.pax_headers = {} # pax header information
1279 # In pax headers the "name" and "linkname" field are called
1280 # "path" and "linkpath".
1283 def _setpath(self, name):
1285 path = property(_getpath, _setpath)
1287 def _getlinkpath(self):
1288 return self.linkname
1289 def _setlinkpath(self, linkname):
1290 self.linkname = linkname
1291 linkpath = property(_getlinkpath, _setlinkpath)
1294 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
1296 def get_info(self, encoding=None, errors=None):
1297 """Return the TarInfo's attributes as a dictionary.
1301 "mode": self.mode & 0o7777,
1305 "mtime": self.mtime,
1306 "chksum": self.chksum,
1308 "linkname": self.linkname,
1309 "uname": self.uname,
1310 "gname": self.gname,
1311 "devmajor": self.devmajor,
1312 "devminor": self.devminor,
1313 "offset_data": self.offset_data,
1314 "volume_offset": self.volume_offset
1317 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
1322 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING,
1323 errors="surrogateescape"):
1324 """Return a tar header as a string of 512 byte blocks.
1326 info = self.get_info(encoding, errors)
1328 if format == USTAR_FORMAT:
1329 return self.create_ustar_header(info, encoding, errors)
1330 elif format == GNU_FORMAT:
1331 return self.create_gnu_header(info, encoding, errors)
1332 elif format == PAX_FORMAT:
1333 return self.create_pax_header(info, encoding, errors)
1335 raise ValueError("invalid format")
1337 def create_ustar_header(self, info, encoding, errors):
1338 """Return the object as a ustar header block.
1340 info["magic"] = POSIX_MAGIC
1342 if len(info["linkname"]) > LENGTH_LINK:
1343 raise ValueError("linkname is too long")
1345 if len(info["name"]) > LENGTH_NAME:
1346 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1348 return self._create_header(info, USTAR_FORMAT, encoding, errors)
1350 def create_gnu_header(self, info, encoding, errors):
1351 """Return the object as a GNU header block sequence.
1353 info["magic"] = GNU_MAGIC
1355 if self.ismultivol():
1357 itn(info.get("atime", 0), 12, GNU_FORMAT),
1358 itn(info.get("ctime", 0), 12, GNU_FORMAT),
1359 itn(self.volume_offset, 12, GNU_FORMAT),
1360 itn(0, 119, GNU_FORMAT), # stuff unused in this tar implementation, set to zero
1362 info['prefix'] = b"".join(prefix)
1363 info['size'] = info['size'] - self.volume_offset
1366 if len(info["linkname"]) > LENGTH_LINK:
1367 buf += self._create_gnu_long_header(info["linkname"],
1368 GNUTYPE_LONGLINK, encoding, errors)
1370 if len(info["name"]) > LENGTH_NAME:
1371 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME,
1374 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
1376 def create_pax_header(self, info, encoding, errors):
1377 """Return the object as a ustar header block. If it cannot be
1378 represented this way, prepend a pax extended header sequence
1379 with supplement information.
1381 info["magic"] = POSIX_MAGIC
1382 pax_headers = self.pax_headers.copy()
1383 if self.ismultivol():
1384 info['size'] = info['size'] - self.volume_offset
1386 # Test string fields for values that exceed the field length or cannot
1387 # be represented in ASCII encoding.
1388 for name, hname, length in (
1389 ("name", "path", LENGTH_NAME),
1390 ("linkname", "linkpath", LENGTH_LINK),
1391 ("uname", "uname", 32),
1392 ("gname", "gname", 32)):
1394 if hname in pax_headers:
1395 # The pax header has priority.
1398 # Try to encode the string as ASCII.
1400 info[name].encode("ascii", "strict")
1401 except UnicodeEncodeError:
1402 pax_headers[hname] = info[name]
1405 if len(info[name]) > length:
1406 pax_headers[hname] = info[name]
1408 # Test number fields for values that exceed the field limit or values
1409 # that like to be stored as float.
1410 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1411 if name in pax_headers:
1412 # The pax header has priority. Avoid overflow.
1417 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1418 pax_headers[name] = str(val)
1421 # Create a pax extended header if necessary.
1423 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
1427 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
1430 def create_pax_global_header(cls, pax_headers):
1431 """Return the object as a pax global header block sequence.
1433 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
1435 def _posix_split_name(self, name):
1436 """Split a name longer than 100 chars into a prefix
1439 prefix = name[:LENGTH_PREFIX + 1]
1440 while prefix and prefix[-1] != "/":
1441 prefix = prefix[:-1]
1443 name = name[len(prefix):]
1444 prefix = prefix[:-1]
1446 if not prefix or len(name) > LENGTH_NAME:
1447 raise ValueError("name is too long")
1451 def _create_header(info, format, encoding, errors):
1452 """Return a header block. info is a dictionary with file
1453 information, format must be one of the *_FORMAT constants.
1456 stn(info.get("name", ""), 100, encoding, errors),
1457 itn(info.get("mode", 0) & 0o7777, 8, format),
1458 itn(info.get("uid", 0), 8, format),
1459 itn(info.get("gid", 0), 8, format),
1460 itn(info.get("size", 0), 12, format),
1461 itn(info.get("mtime", 0), 12, format),
1462 b" ", # checksum field
1463 info.get("type", REGTYPE),
1464 stn(info.get("linkname", ""), 100, encoding, errors),
1465 info.get("magic", POSIX_MAGIC),
1466 stn(info.get("uname", ""), 32, encoding, errors),
1467 stn(info.get("gname", ""), 32, encoding, errors),
1468 itn(info.get("devmajor", 0), 8, format),
1469 itn(info.get("devminor", 0), 8, format),
1470 sbtn(info.get("prefix", ""), 155, encoding, errors)
1473 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
1474 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1475 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
1479 def _create_payload(payload):
1480 """Return the string payload filled with zero bytes
1481 up to the next 512 byte border.
1483 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1485 payload += (BLOCKSIZE - remainder) * NUL
1489 def _create_gnu_long_header(cls, name, type, encoding, errors):
1490 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1493 name = name.encode(encoding, errors) + NUL
1496 info["name"] = "././@LongLink"
1498 info["size"] = len(name)
1499 info["magic"] = GNU_MAGIC
1501 # create extended header + name blocks.
1502 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
1503 cls._create_payload(name)
1506 def _create_pax_generic_header(cls, pax_headers, type, encoding):
1507 """Return a POSIX.1-2008 extended or global header sequence
1508 that contains a list of keyword, value pairs. The values
1511 # Check if one of the fields contains surrogate characters and thereby
1512 # forces hdrcharset=BINARY, see _proc_pax() for more information.
1514 for keyword, value in pax_headers.items():
1516 value.encode("utf-8", "strict")
1517 except UnicodeEncodeError:
1523 # Put the hdrcharset field at the beginning of the header.
1524 records += b"21 hdrcharset=BINARY\n"
1526 for keyword, value in pax_headers.items():
1527 keyword = keyword.encode("utf-8")
1529 # Try to restore the original byte representation of `value'.
1530 # Needless to say, that the encoding must match the string.
1531 value = value.encode(encoding, "surrogateescape")
1533 value = value.encode("utf-8")
1535 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1542 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1544 # We use a hardcoded "././@PaxHeader" name like star does
1545 # instead of the one that POSIX recommends.
1547 info["name"] = "././@PaxHeader"
1549 info["size"] = len(records)
1550 info["magic"] = POSIX_MAGIC
1552 # Create pax header + record blocks.
1553 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1554 cls._create_payload(records)
1557 def frombuf(cls, buf, encoding, errors):
1558 """Construct a TarInfo object from a 512 byte bytes object.
1561 raise EmptyHeaderError("empty header")
1562 if len(buf) != BLOCKSIZE:
1563 raise TruncatedHeaderError("truncated header")
1564 if buf.count(NUL) == BLOCKSIZE:
1565 raise EOFHeaderError("end of file header")
1567 chksum = nti(buf[148:156])
1568 if chksum not in calc_chksums(buf):
1569 raise InvalidHeaderError("bad checksum")
1572 obj.name = nts(buf[0:100], encoding, errors)
1573 obj.mode = nti(buf[100:108])
1574 obj.uid = nti(buf[108:116])
1575 obj.gid = nti(buf[116:124])
1576 obj.size = nti(buf[124:136])
1577 obj.mtime = nti(buf[136:148])
1579 obj.type = buf[156:157]
1580 obj.linkname = nts(buf[157:257], encoding, errors)
1581 obj.uname = nts(buf[265:297], encoding, errors)
1582 obj.gname = nts(buf[297:329], encoding, errors)
1583 obj.devmajor = nti(buf[329:337])
1584 obj.devminor = nti(buf[337:345])
1585 prefix = nts(buf[345:500], encoding, errors)
1587 # The old GNU sparse format occupies some of the unused
1588 # space in the buffer for up to 4 sparse structures.
1589 # Save the them for later processing in _proc_sparse().
1590 if obj.type == GNUTYPE_SPARSE:
1595 offset = nti(buf[pos:pos + 12])
1596 numbytes = nti(buf[pos + 12:pos + 24])
1599 structs.append((offset, numbytes))
1601 isextended = bool(buf[482])
1602 origsize = nti(buf[483:495])
1603 obj._sparse_structs = (structs, isextended, origsize)
1605 # Old V7 tar format represents a directory as a regular
1606 # file with a trailing slash.
1607 if obj.type == AREGTYPE and obj.name.endswith("/"):
1610 # Remove redundant slashes from directories.
1612 obj.name = obj.name.rstrip("/")
1614 # Reconstruct a ustar longname.
1615 if prefix and obj.type not in GNU_TYPES:
1616 obj.name = prefix + "/" + obj.name
1618 obj.offset_data = nti(buf[369:381])
1622 def fromtarfile(cls, tarfile):
1623 """Return the next TarInfo object from TarFile object
1626 buf = tarfile.fileobj.read(BLOCKSIZE)
1627 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1628 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1629 return obj._proc_member(tarfile)
1631 #--------------------------------------------------------------------------
1632 # The following are methods that are called depending on the type of a
1633 # member. The entry point is _proc_member() which can be overridden in a
1634 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1635 # implement the following
1637 # 1. Set self.offset_data to the position where the data blocks begin,
1638 # if there is data that follows.
1639 # 2. Set tarfile.offset to the position where the next member's header will
1641 # 3. Return self or another valid TarInfo object.
1642 def _proc_member(self, tarfile):
1643 """Choose the right processing method depending on
1644 the type and call it.
1646 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1647 return self._proc_gnulong(tarfile)
1648 elif self.type == GNUTYPE_SPARSE:
1649 return self._proc_sparse(tarfile)
1650 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1651 return self._proc_pax(tarfile)
1653 return self._proc_builtin(tarfile)
1655 def _proc_builtin(self, tarfile):
1656 """Process a builtin type or an unknown type which
1657 will be treated as a regular file.
1659 self.offset_data = tarfile.fileobj.tell()
1660 offset = self.offset_data
1661 if self.isreg() or self.ismultivol() or self.type not in SUPPORTED_TYPES:
1662 # Skip the following data blocks.
1663 offset += self._block(self.size)
1664 tarfile.offset = offset
1666 # Patch the TarInfo object with saved global
1667 # header information.
1668 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1672 def _proc_gnulong(self, tarfile):
1673 """Process the blocks that hold a GNU longname
1676 buf = tarfile.fileobj.read(self._block(self.size))
1678 # Fetch the next header and process it.
1680 next = self.fromtarfile(tarfile)
1682 raise SubsequentHeaderError("missing or bad subsequent header")
1684 # Patch the TarInfo object from the next header with
1685 # the longname information.
1686 next.offset = self.offset
1687 if self.type == GNUTYPE_LONGNAME:
1688 next.name = nts(buf, tarfile.encoding, tarfile.errors)
1689 elif self.type == GNUTYPE_LONGLINK:
1690 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1694 def _proc_sparse(self, tarfile):
1695 """Process a GNU sparse header plus extra headers.
1697 # We already collected some sparse structures in frombuf().
1698 structs, isextended, origsize = self._sparse_structs
1699 del self._sparse_structs
1701 # Collect sparse structures from extended header blocks.
1703 buf = tarfile.fileobj.read(BLOCKSIZE)
1707 offset = nti(buf[pos:pos + 12])
1708 numbytes = nti(buf[pos + 12:pos + 24])
1711 if offset and numbytes:
1712 structs.append((offset, numbytes))
1714 isextended = bool(buf[504])
1715 self.sparse = structs
1717 self.offset_data = tarfile.fileobj.tell()
1718 tarfile.offset = self.offset_data + self._block(self.size)
1719 self.size = origsize
1722 def _proc_pax(self, tarfile):
1723 """Process an extended or global header as described in
1726 # Read the header information.
1727 buf = tarfile.fileobj.read(self._block(self.size))
1729 # A pax header stores supplemental information for either
1730 # the following file (extended) or all following files
1732 if self.type == XGLTYPE:
1733 pax_headers = tarfile.pax_headers
1735 pax_headers = tarfile.pax_headers.copy()
1737 # Check if the pax header contains a hdrcharset field. This tells us
1738 # the encoding of the path, linkpath, uname and gname fields. Normally,
1739 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1740 # implementations are allowed to store them as raw binary strings if
1741 # the translation to UTF-8 fails.
1742 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1743 if match is not None:
1744 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
1746 # For the time being, we don't care about anything other than "BINARY".
1747 # The only other value that is currently allowed by the standard is
1748 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1749 hdrcharset = pax_headers.get("hdrcharset")
1750 if hdrcharset == "BINARY":
1751 encoding = tarfile.encoding
1755 # Parse pax header information. A record looks like that:
1756 # "%d %s=%s\n" % (length, keyword, value). length is the size
1757 # of the complete record including the length field itself and
1758 # the newline. keyword and value are both UTF-8 encoded strings.
1759 regex = re.compile(br"(\d+) ([^=]+)=")
1762 match = regex.match(buf, pos)
1766 length, keyword = match.groups()
1767 length = int(length)
1768 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1770 # Normally, we could just use "utf-8" as the encoding and "strict"
1771 # as the error handler, but we better not take the risk. For
1772 # example, GNU tar <= 1.23 is known to store filenames it cannot
1773 # translate to UTF-8 as raw strings (unfortunately without a
1774 # hdrcharset=BINARY header).
1775 # We first try the strict standard encoding, and if that fails we
1776 # fall back on the user's encoding and error handler.
1777 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
1779 if keyword in PAX_NAME_FIELDS:
1780 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1783 value = self._decode_pax_field(value, "utf-8", "utf-8",
1786 pax_headers[keyword] = value
1790 # Fetch the next header.
1792 next = self.fromtarfile(tarfile)
1794 raise SubsequentHeaderError("missing or bad subsequent header")
1796 # Process GNU sparse information.
1797 if "GNU.sparse.map" in pax_headers:
1798 # GNU extended sparse format version 0.1.
1799 self._proc_gnusparse_01(next, pax_headers)
1801 elif "GNU.sparse.size" in pax_headers:
1802 # GNU extended sparse format version 0.0.
1803 self._proc_gnusparse_00(next, pax_headers, buf)
1805 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1806 # GNU extended sparse format version 1.0.
1807 self._proc_gnusparse_10(next, pax_headers, tarfile)
1809 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1810 # Patch the TarInfo object with the extended header info.
1811 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1812 next.offset = self.offset
1814 if "size" in pax_headers:
1815 # If the extended header replaces the size field,
1816 # we need to recalculate the offset where the next
1818 offset = next.offset_data
1819 if next.isreg() or next.type not in SUPPORTED_TYPES:
1820 offset += next._block(next.size)
1821 tarfile.offset = offset
1823 if next is not None:
1824 if "GNU.volume.filename" in pax_headers:
1825 if pax_headers["GNU.volume.filename"] == next.name:
1826 if "GNU.volume.size" in pax_headers:
1827 next.size = int(pax_headers["GNU.volume.size"])
1828 if "GNU.volume.offset" in pax_headers:
1829 next.volume_offset = int(pax_headers["GNU.volume.offset"])
1831 for key in pax_headers.keys():
1832 if key.startswith("GNU.volume"):
1833 del tarfile.pax_headers[key]
1837 def _proc_gnusparse_00(self, next, pax_headers, buf):
1838 """Process a GNU tar extended sparse header, version 0.0.
1841 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1842 offsets.append(int(match.group(1)))
1844 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1845 numbytes.append(int(match.group(1)))
1846 next.sparse = list(zip(offsets, numbytes))
1848 def _proc_gnusparse_01(self, next, pax_headers):
1849 """Process a GNU tar extended sparse header, version 0.1.
1851 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1852 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1854 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1855 """Process a GNU tar extended sparse header, version 1.0.
1859 buf = tarfile.fileobj.read(BLOCKSIZE)
1860 fields, buf = buf.split(b"\n", 1)
1861 fields = int(fields)
1862 while len(sparse) < fields * 2:
1863 if b"\n" not in buf:
1864 buf += tarfile.fileobj.read(BLOCKSIZE)
1865 number, buf = buf.split(b"\n", 1)
1866 sparse.append(int(number))
1867 next.offset_data = tarfile.fileobj.tell()
1868 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1870 def _apply_pax_info(self, pax_headers, encoding, errors):
1871 """Replace fields with supplemental information from a previous
1872 pax extended or global header.
1874 for keyword, value in pax_headers.items():
1875 if keyword == "GNU.sparse.name":
1876 setattr(self, "path", value)
1877 elif keyword == "GNU.sparse.size":
1878 setattr(self, "size", int(value))
1879 elif keyword == "GNU.sparse.realsize":
1880 setattr(self, "size", int(value))
1881 elif keyword in PAX_FIELDS:
1882 if keyword in PAX_NUMBER_FIELDS:
1884 value = PAX_NUMBER_FIELDS[keyword](value)
1887 if keyword == "path":
1888 value = value.rstrip("/") # pylint: disable=no-member
1889 setattr(self, keyword, value)
1891 self.pax_headers = pax_headers.copy()
1893 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1894 """Decode a single field from a pax record.
1897 return value.decode(encoding, "strict")
1898 except UnicodeDecodeError:
1899 return value.decode(fallback_encoding, fallback_errors)
1901 def _block(self, count):
1902 """Round up a byte count by BLOCKSIZE and return it,
1903 e.g. _block(834) => 1024.
1905 blocks, remainder = divmod(count, BLOCKSIZE)
1908 return blocks * BLOCKSIZE
1911 return self.type in REGULAR_TYPES
1915 return self.type == DIRTYPE
1917 return self.type == SYMTYPE
1919 return self.type == LNKTYPE
1921 return self.type == CHRTYPE
1923 return self.type == BLKTYPE
1925 return self.type == FIFOTYPE
1927 return self.sparse is not None
1929 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1930 def ismultivol(self):
1931 return self.type == GNUTYPE_MULTIVOL or self.volume_offset > 0 or\
1932 "GNU.volume.offset" in self.pax_headers
1935 class TarFile(object):
1936 """The TarFile Class provides an interface to tar archives.
1939 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1941 dereference = False # If true, add content of linked file to the
1942 # tar file, else the link.
1944 ignore_zeros = False # If true, skips empty or invalid blocks and
1945 # continues processing.
1947 max_volume_size = None # If different from None, establishes maximum
1948 # size of tar volumes
1950 new_volume_handler = None # function handler to be executed before when
1951 # a new volume is needed
1953 volume_number = 0 # current volume number, used for multi volume
1956 errorlevel = 1 # If 0, fatal errors only appear in debug
1957 # messages (if debug >= 0). If > 0, errors
1958 # are passed to the caller as exceptions.
1960 format = DEFAULT_FORMAT # The format to use when creating an archive.
1962 encoding = ENCODING # Encoding for 8-bit character strings.
1964 errors = None # Error handler for unicode conversion.
1966 tarinfo = TarInfo # The default TarInfo class to use.
1968 fileobject = ExFileObject # The file-object for extractfile().
1970 arcmode = ARCMODE_PLAIN # Object processing mode (“concat”, encryption,
1973 save_to_members = True # If new members are saved. This can be disabled
1974 # if you manage lots of files and don't want
1975 # to have high memory usage
1977 cache_uid2user = {} # cache to avoid getpwuid calls. It always parses /etc/passwd.
1978 cache_gid2group = {} # same cache for groups
1980 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1981 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1982 errors="surrogateescape", pax_headers=None, debug=None,
1983 errorlevel=None, max_volume_size=None, new_volume_handler=None,
1984 concat=False, nacl=None,
1985 save_to_members=True):
1986 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1987 read from an existing archive, 'a' to append data to an existing
1988 file or 'w' to create a new file overwriting an existing one. `mode'
1990 If `fileobj' is given, it is used for reading or writing data. If it
1991 can be determined, `mode' is overridden by `fileobj's mode.
1992 `fileobj' is not closed, when TarFile is closed.
1994 if len(mode) > 1 or mode not in "raw":
1995 raise ValueError("mode must be 'r', 'a' or 'w'")
1997 self.arcmode = arcmode_set (concat)
1999 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
2002 if self.mode == "a" and not os.path.exists(name):
2003 # Create nonexistent files in append mode.
2006 fileobj = bltn_open(name, self._mode)
2007 self._extfileobj = False
2009 if name is None and hasattr(fileobj, "name"):
2011 # when fileobj is a gzip.GzipFile, fileobj.mode is an int (not valid for us)
2012 if hasattr(fileobj, "mode") and isinstance(fileobj.mode, str):
2013 self._mode = fileobj.mode
2014 self._extfileobj = True
2015 self.name = os.path.abspath(name) if name else None
2016 self.base_name = self.name = os.path.abspath(name) if name else None
2017 self.fileobj = fileobj
2020 if format is not None:
2021 self.format = format
2022 if tarinfo is not None:
2023 self.tarinfo = tarinfo
2024 if dereference is not None:
2025 self.dereference = dereference
2026 if ignore_zeros is not None:
2027 self.ignore_zeros = ignore_zeros
2028 if encoding is not None:
2029 self.encoding = encoding
2031 self.errors = errors
2033 if pax_headers is not None and self.format == PAX_FORMAT:
2034 self.pax_headers = pax_headers
2036 self.pax_headers = {}
2038 if debug is not None:
2040 if errorlevel is not None:
2041 self.errorlevel = errorlevel
2043 # Init datastructures.
2044 if max_volume_size and max_volume_size < 3*BLOCKSIZE:
2045 raise ValueError("max_volume_size needs to be at least %d" % (3*BLOCKSIZE))
2046 if max_volume_size and not callable(new_volume_handler):
2047 raise ValueError("new_volume_handler needs to be set and be callable for multivolume support")
2049 self.max_volume_size = int(max_volume_size)
2051 self.max_volume_size = None
2053 self.save_to_members = save_to_members
2054 self.new_volume_handler = new_volume_handler
2056 self.members = [] # list of members as TarInfo objects
2057 self._loaded = False # flag if all members have been read
2058 self.offset = self.fileobj.tell()
2059 # current position in the archive file
2060 self.inodes = {} # dictionary caching the inodes of
2061 # archive members already added
2064 if self.mode == "r":
2065 self.firstmember = None
2066 self.firstmember = self.next()
2068 if self.mode == "a":
2069 # Move to the end of the archive,
2070 # before the first empty block.
2072 self.fileobj.seek(self.offset)
2074 tarinfo = self.tarinfo.fromtarfile(self)
2075 self.members.append(tarinfo)
2076 except EOFHeaderError:
2077 self.fileobj.seek(self.offset)
2079 except HeaderError as e:
2080 raise ReadError(str(e))
2082 if self.mode in "aw":
2085 if self.pax_headers:
2086 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
2087 self.fileobj.write(buf)
2088 self.offset += len(buf)
2090 if not self._extfileobj:
2091 self.fileobj.close()
2095 #--------------------------------------------------------------------------
2096 # Below are the classmethods which act as alternate constructors to the
2097 # TarFile class. The open() method is the only one that is needed for
2098 # public use; it is the "super"-constructor and is able to select an
2099 # adequate "sub"-constructor for a particular compression using the mapping
2102 # This concept allows one to subclass TarFile without losing the comfort of
2103 # the super-constructor. A sub-constructor is registered and made available
2104 # by adding it to the mapping in OPEN_METH.
2107 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE,
2108 encryption=None, compresslevel=9, tolerance=TOLERANCE_STRICT,
2110 """Open a tar archive for reading, writing or appending. Return
2111 an appropriate TarFile class.
2114 'r' or 'r:*' open for reading with transparent compression
2115 'r:' open for reading exclusively uncompressed
2116 'r:gz' open for reading with gzip compression
2117 'r:bz2' open for reading with bzip2 compression
2118 'r:xz' open for reading with lzma compression
2119 'a' or 'a:' open for appending, creating the file if necessary
2120 'w' or 'w:' open for writing without compression
2121 'w:gz' open for writing with gzip compression
2122 'w:bz2' open for writing with bzip2 compression
2123 'w:xz' open for writing with lzma compression
2125 'r|*' open a stream of tar blocks with transparent compression
2126 'r|' open an uncompressed stream of tar blocks for reading
2127 'r|gz' open a gzip compressed stream of tar blocks
2128 'r|bz2' open a bzip2 compressed stream of tar blocks
2129 'r|xz' open an lzma compressed stream of tar blocks
2130 'w|' open an uncompressed stream for writing
2131 'w|gz' open a gzip compressed stream for writing
2132 'w|bz2' open a bzip2 compressed stream for writing
2133 'w|xz' open an lzma compressed stream for writing
2135 'r#gz' open a stream of gzip compressed tar blocks for reading
2136 'w#gz' open a stream of gzip compressed tar blocks for writing
2138 if not name and not fileobj:
2139 raise ValueError("nothing to open")
2141 if mode in ("r", "r:*"):
2142 # Find out which *open() is appropriate for opening the file.
2143 for comptype in cls.OPEN_METH:
2144 func = getattr(cls, cls.OPEN_METH[comptype])
2145 if fileobj is not None:
2146 saved_pos = fileobj.tell()
2148 return func(name, "r", fileobj, **kwargs)
2149 except (ReadError, CompressionError) as e:
2150 # usually nothing exceptional but sometimes is
2151 if fileobj is not None:
2152 fileobj.seek(saved_pos)
2154 raise ReadError("file could not be opened successfully")
2157 filemode, comptype = mode.split(":", 1)
2158 filemode = filemode or "r"
2159 comptype = comptype or "tar"
2161 # Select the *open() function according to
2162 # given compression.
2163 if comptype in cls.OPEN_METH:
2164 func = getattr(cls, cls.OPEN_METH[comptype])
2166 raise CompressionError("unknown compression type %r" % comptype)
2168 # Pass on compression level for gzip / bzip2.
2169 if comptype == 'gz' or comptype == 'bz2':
2170 kwargs['compresslevel'] = compresslevel
2172 if 'max_volume_size' in kwargs:
2173 if comptype != 'tar' and filemode in 'wa' \
2174 and kwargs['max_volume_size']:
2176 warnings.warn('Only the first volume will be compressed '
2177 'for modes with "w:"!')
2179 return func(name, filemode, fileobj, **kwargs)
2182 filemode, comptype = mode.split("|", 1)
2183 filemode = filemode or "r"
2184 comptype = comptype or "tar"
2186 if filemode not in "rw":
2187 raise ValueError("mode must be 'r' or 'w'")
2189 t = cls(name, filemode,
2190 _Stream(name, filemode, comptype, fileobj, bufsize,
2191 compresslevel=compresslevel),
2193 t._extfileobj = False
2197 filemode, comptype = mode.split("#", 1)
2198 filemode = filemode or "r"
2200 if filemode not in "rw":
2201 raise ValueError ("mode %s not compatible with concat "
2202 "archive; must be 'r' or 'w'" % mode)
2204 stream = _Stream(name, filemode, comptype, fileobj, bufsize,
2205 concat=True, encryption=encryption,
2206 compresslevel=compresslevel, tolerance=tolerance)
2207 kwargs ["concat"] = True
2209 t = cls(name, filemode, stream, **kwargs)
2210 except: # XXX except what?
2212 raise # XXX raise what?
2213 t._extfileobj = False
2217 return cls.taropen(name, mode, fileobj, **kwargs)
2219 raise ValueError("undiscernible mode %r" % mode)
2223 def open_at_offset(cls, offset, *a, **kwa):
2225 Same as ``.open()``, but start reading at the given offset. Assumes a
2226 seekable file object.
2228 fileobj = kwa.get ("fileobj")
2229 if fileobj is not None:
2230 fileobj.seek (offset)
2231 return cls.open (*a, **kwa)
2235 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
2236 """Open uncompressed tar archive name for reading or writing.
2238 if len(mode) > 1 or mode not in "raw":
2239 raise ValueError("mode must be 'r', 'a' or 'w'")
2240 return cls(name, mode, fileobj, **kwargs)
2243 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
2244 """Open gzip compressed tar archive name for reading or writing.
2245 Appending is not allowed.
2247 if len(mode) > 1 or mode not in "rw":
2248 raise ValueError("mode must be 'r' or 'w'")
2253 except (ImportError, AttributeError):
2254 raise CompressionError("gzip module is not available")
2256 extfileobj = fileobj is not None
2258 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
2259 t = cls.taropen(name, mode, fileobj, **kwargs)
2261 if not extfileobj and fileobj is not None:
2265 raise ReadError("not a gzip file")
2267 if not extfileobj and fileobj is not None:
2270 t._extfileobj = extfileobj
2274 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
2275 """Open bzip2 compressed tar archive name for reading or writing.
2276 Appending is not allowed.
2278 if len(mode) > 1 or mode not in "rw":
2279 raise ValueError("mode must be 'r' or 'w'.")
2284 raise CompressionError("bz2 module is not available")
2286 fileobj = bz2.BZ2File(fileobj or name, mode,
2287 compresslevel=compresslevel)
2290 t = cls.taropen(name, mode, fileobj, **kwargs)
2291 except (OSError, EOFError):
2293 raise ReadError("not a bzip2 file")
2294 t._extfileobj = False
2298 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
2299 """Open lzma compressed tar archive name for reading or writing.
2300 Appending is not allowed.
2302 if mode not in ("r", "w"):
2303 raise ValueError("mode must be 'r' or 'w'")
2308 raise CompressionError("lzma module is not available")
2310 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
2313 t = cls.taropen(name, mode, fileobj, **kwargs)
2314 except (lzma.LZMAError, EOFError):
2316 raise ReadError("not an lzma file")
2317 t._extfileobj = False
2320 # All *open() methods are registered here.
2322 "tar": "taropen", # uncompressed tar
2323 "gz": "gzopen", # gzip compressed tar
2324 "bz2": "bz2open", # bzip2 compressed tar
2325 "xz": "xzopen" # lzma compressed tar
2328 #--------------------------------------------------------------------------
2329 # The public methods which TarFile provides:
2332 """Close the TarFile. In write-mode, two finishing zero blocks are
2333 appended to the archive. A special case are empty archives which are
2334 initialized accordingly so the two mandatory blocks of zeros are
2335 written abiding by the requested encryption and compression settings.
2340 if self.mode in "aw":
2341 if self.arcmode & ARCMODE_CONCAT and self.fileobj.tell () == 0:
2342 self.fileobj.next ("")
2343 self.fileobj.write(NUL * (BLOCKSIZE * 2))
2344 self.offset += (BLOCKSIZE * 2)
2345 # fill up the end with zero-blocks
2346 # (like option -b20 for tar does)
2347 blocks, remainder = divmod(self.offset, RECORDSIZE)
2349 self.fileobj.write(NUL * (RECORDSIZE - remainder))
2350 if not self._extfileobj:
2351 self.fileobj.close()
2354 def getmember(self, name):
2355 """Return a TarInfo object for member `name'. If `name' can not be
2356 found in the archive, KeyError is raised. If a member occurs more
2357 than once in the archive, its last occurrence is assumed to be the
2358 most up-to-date version.
2360 tarinfo = self._getmember(name)
2362 raise KeyError("filename %r not found" % name)
2365 def getmembers(self):
2366 """Return the members of the archive as a list of TarInfo objects. The
2367 list has the same order as the members in the archive.
2370 if not self._loaded: # if we want to obtain a list of
2371 self._load() # all members, we first have to
2372 # scan the whole archive.
2375 def get_last_member_offset(self):
2376 """Return the last member offset. Usually this is self.fileobj.tell(),
2377 but when there's encryption or concat compression going on it's more
2378 complicated than that.
2380 return self.last_block_offset
2383 """Return the members of the archive as a list of their names. It has
2384 the same order as the list returned by getmembers().
2386 return [tarinfo.name for tarinfo in self.getmembers()]
2388 def gettarinfo(self, name=None, arcname=None, fileobj=None):
2389 """Create a TarInfo object for either the file `name' or the file
2390 object `fileobj' (using os.fstat on its file descriptor). You can
2391 modify some of the TarInfo's attributes before you add it using
2392 addfile(). If given, `arcname' specifies an alternative name for the
2393 file in the archive.
2397 # When fileobj is given, replace name by
2398 # fileobj's real name.
2399 if fileobj is not None:
2402 # Building the name of the member in the archive.
2403 # Backward slashes are converted to forward slashes,
2404 # Absolute paths are turned to relative paths.
2407 drv, arcname = os.path.splitdrive(arcname)
2408 arcname = arcname.replace(os.sep, "/")
2409 arcname = arcname.lstrip("/")
2411 # Now, fill the TarInfo object with
2412 # information specific for the file.
2413 tarinfo = self.tarinfo()
2414 tarinfo.tarfile = self
2416 # Use os.stat or os.lstat, depending on platform
2417 # and if symlinks shall be resolved.
2419 if hasattr(os, "lstat") and not self.dereference:
2420 statres = os.lstat(name)
2422 statres = os.stat(name)
2424 statres = os.fstat(fileobj.fileno())
2427 stmd = statres.st_mode
2428 if stat.S_ISREG(stmd):
2429 inode = (statres.st_ino, statres.st_dev)
2430 if not self.dereference and statres.st_nlink > 1 and \
2431 inode in self.inodes and arcname != self.inodes[inode]:
2432 # Is it a hardlink to an already
2435 linkname = self.inodes[inode]
2437 # The inode is added only if its valid.
2438 # For win32 it is always 0.
2440 if inode[0] and self.save_to_members:
2441 self.inodes[inode] = arcname
2442 elif stat.S_ISDIR(stmd):
2444 elif stat.S_ISFIFO(stmd):
2446 elif stat.S_ISLNK(stmd):
2448 linkname = os.readlink(name)
2449 elif stat.S_ISCHR(stmd):
2451 elif stat.S_ISBLK(stmd):
2456 # Fill the TarInfo object with all
2457 # information we can get.
2458 tarinfo.name = arcname
2460 tarinfo.uid = statres.st_uid
2461 tarinfo.gid = statres.st_gid
2463 tarinfo.size = statres.st_size
2466 tarinfo.mtime = statres.st_mtime
2468 tarinfo.linkname = linkname
2470 if tarinfo.uid in self.cache_uid2user:
2471 tarinfo.uname = self.cache_uid2user[tarinfo.uid]
2474 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
2475 self.cache_uid2user[tarinfo.uid] = tarinfo.uname
2477 # remember user does not exist:
2478 # same default value as in tarinfo class
2479 self.cache_uid2user[tarinfo.uid] = ""
2481 if tarinfo.gid in self.cache_gid2group:
2482 tarinfo.gname = self.cache_gid2group[tarinfo.gid]
2485 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
2486 self.cache_gid2group[tarinfo.gid] = tarinfo.gname
2488 # remember group does not exist:
2489 # same default value as in tarinfo class
2490 self.cache_gid2group[tarinfo.gid] = ""
2492 if type in (CHRTYPE, BLKTYPE):
2493 if hasattr(os, "major") and hasattr(os, "minor"):
2494 tarinfo.devmajor = os.major(statres.st_rdev)
2495 tarinfo.devminor = os.minor(statres.st_rdev)
2498 def list(self, verbose=True):
2499 """Print a table of contents to sys.stdout. If `verbose' is False, only
2500 the names of the members are printed. If it is True, an `ls -l'-like
2505 for tarinfo in self:
2507 print(stat.filemode(tarinfo.mode), end=' ')
2508 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
2509 tarinfo.gname or tarinfo.gid), end=' ')
2510 if tarinfo.ischr() or tarinfo.isblk():
2511 print("%10s" % ("%d,%d" \
2512 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
2514 print("%10d" % tarinfo.size, end=' ')
2515 print("%d-%02d-%02d %02d:%02d:%02d" \
2516 % time.localtime(tarinfo.mtime)[:6], end=' ')
2518 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
2522 print("->", tarinfo.linkname, end=' ')
2524 print("link to", tarinfo.linkname, end=' ')
2527 def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
2528 """Add the file `name' to the archive. `name' may be any type of file
2529 (directory, fifo, symbolic link, etc.). If given, `arcname'
2530 specifies an alternative name for the file in the archive.
2531 Directories are added recursively by default. This can be avoided by
2532 setting `recursive' to False. `exclude' is a function that should
2533 return True for each filename to be excluded. `filter' is a function
2534 that expects a TarInfo object argument and returns the changed
2535 TarInfo object, if it returns None the TarInfo object will be
2536 excluded from the archive.
2543 # Exclude pathnames.
2544 if exclude is not None:
2546 warnings.warn("use the filter argument instead",
2547 DeprecationWarning, 2)
2549 self._dbg(2, "tarfile: Excluded %r" % name)
2552 # Skip if somebody tries to archive the archive...
2553 if self.name is not None and os.path.abspath(name) == self.name:
2554 self._dbg(2, "tarfile: Skipped %r" % name)
2559 # Create a TarInfo object from the file.
2560 tarinfo = self.gettarinfo(name, arcname)
2563 self._dbg(1, "tarfile: Unsupported type %r" % name)
2566 # Change or exclude the TarInfo object.
2567 if filter is not None:
2568 tarinfo = filter(tarinfo)
2570 self._dbg(2, "tarfile: Excluded %r" % name)
2573 # Append the tar header and data to the archive.
2575 with bltn_open(name, "rb") as f:
2576 self.addfile(tarinfo, f)
2578 elif tarinfo.isdir():
2579 self.addfile(tarinfo)
2581 for f in os.listdir(name):
2582 self.add(os.path.join(name, f), os.path.join(arcname, f),
2583 recursive, exclude, filter=filter)
2586 self.addfile(tarinfo)
2588 def _size_left_file(self):
2589 """Calculates size left in a volume with a maximum volume size.
2591 Assumes self.max_volume_size is set.
2592 If using compression through a _Stream, use _size_left_stream instead
2594 # left-over size = max_size - offset - 2 zero-blocks written in close
2595 size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset
2596 # limit size left to a discrete number of blocks, because we won't
2597 # write only half a block when writting the end of a volume
2598 # and filling with zeros
2599 return BLOCKSIZE * (size_left // BLOCKSIZE)
2601 def _size_left_stream(self):
2602 """ Calculates size left in a volume if using comression/encryption
2604 Assumes self.max_volume_size is set and self.fileobj is a _Stream
2605 (otherwise use _size_left_file)
2607 # left-over size = max_size - bytes written - 2 zero-blocks (close)
2608 size_left = self.max_volume_size - self.fileobj.estim_file_size() \
2610 return BLOCKSIZE * (size_left // BLOCKSIZE)
2612 def addfile(self, tarinfo, fileobj=None):
2613 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2614 given, tarinfo.size bytes are read from it and added to the archive.
2615 You can create TarInfo objects using gettarinfo().
2616 On Windows platforms, `fileobj' should always be opened with mode
2617 'rb' to avoid irritation about the file size.
2621 tarinfo = copy.copy(tarinfo)
2623 if self.arcmode & ARCMODE_CONCAT:
2624 self.last_block_offset = self.fileobj.next (tarinfo.name)
2626 self.last_block_offset = self.fileobj.tell()
2628 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2629 self.fileobj.write(buf)
2630 self.offset += len(buf)
2632 if self.max_volume_size:
2633 if isinstance(self.fileobj, _Stream):
2634 _size_left = self._size_left_stream
2636 _size_left = self._size_left_file
2638 _size_left = lambda: tarinfo.size
2640 # If there's no data to follow, finish
2642 if self.save_to_members:
2643 self.members.append(tarinfo)
2646 target_size_left = _size_left()
2647 source_size_left = tarinfo.size
2648 assert tarinfo.volume_offset == 0
2650 # we only split volumes in the middle of a file, that means we have
2651 # to write at least one block
2652 if target_size_left < BLOCKSIZE:
2653 target_size_left = BLOCKSIZE
2655 # loop over multiple volumes
2656 while source_size_left > 0:
2658 # Write as much data as possble from source into target.
2659 # When compressing data, we cannot easily predict how much data we
2660 # can write until target_size_left == 0 --> need to iterate
2661 size_can_write = min(target_size_left, source_size_left)
2663 while size_can_write > 0:
2664 copyfileobj(fileobj, self.fileobj, size_can_write)
2665 self.offset += size_can_write
2666 source_size_left -= size_can_write
2667 target_size_left = _size_left()
2668 size_can_write = min(target_size_left, source_size_left)
2670 # now target_size_left == 0 or source_size_left == 0
2672 # if there is data left to write, we need to create a new volume
2673 if source_size_left > 0:
2674 # Only finalize the crypto entry here if we’re continuing with
2675 # another one; otherwise, the encryption must include the block
2677 tarinfo.type = GNUTYPE_MULTIVOL
2679 if not self.new_volume_handler or\
2680 not callable(self.new_volume_handler):
2681 raise Exception("We need to create a new volume and you "
2682 "didn't supply a new_volume_handler")
2685 # the new volume handler should do everything needed to
2686 # start working in a new volume. usually, the handler calls
2687 # to self.open_volume
2688 self.volume_number += 1
2690 # set to be used by open_volume, because in the case of a PAX
2691 # tar it needs to write information about the volume and offset
2692 # in the global header
2693 tarinfo.volume_offset = tarinfo.size - source_size_left
2694 self.volume_tarinfo = tarinfo
2696 # the “new_volume_handler” is supposed to call .close() on the
2698 self.new_volume_handler(self, self.base_name, self.volume_number)
2700 self.volume_tarinfo = None
2702 if self.arcmode & ARCMODE_CONCAT:
2703 self.fileobj.next_volume (tarinfo.name)
2705 # write new volume header
2706 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2707 self.fileobj.write(buf)
2708 self.offset += len(buf)
2710 # adjust variables; open_volume should have reset self.offset
2711 # --> _size_left should be big again
2712 target_size_left = _size_left()
2713 size_can_write = min(target_size_left, source_size_left)
2714 self._dbg(3, 'new volume')
2716 # now, all data has been written. We may have to fill up the rest of
2717 # the block in target with 0s
2718 remainder = (tarinfo.size - tarinfo.volume_offset) % BLOCKSIZE
2720 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2721 self.offset += BLOCKSIZE - remainder
2723 if self.save_to_members:
2724 self.members.append(tarinfo)
2726 def open_volume(self, name="", fileobj=None, encryption=None):
2728 Called by the user to change this tar file to point to a new volume.
2730 # open the file using either fileobj or name
2732 if self.mode == "a" and not os.path.exists(name):
2733 # Create nonexistent files in append mode.
2736 self._extfileobj = False
2738 if isinstance(self.fileobj, _Stream):
2739 self._dbg(3, 'open_volume: create a _Stream')
2740 fileobj = _Stream(name=name,
2741 mode=self.fileobj.mode,
2742 comptype=self.fileobj.comptype,
2744 bufsize=self.fileobj.bufsize,
2745 encryption=encryption or self.fileobj.encryption,
2746 concat=self.fileobj.arcmode & ARCMODE_CONCAT)
2748 # here, we lose information about compression/encryption!
2749 self._dbg(3, 'open_volume: builtin open')
2750 fileobj = bltn_open(name, self._mode)
2752 if name is None and hasattr(fileobj, "name"):
2754 if hasattr(fileobj, "mode"):
2755 self._mode = fileobj.mode
2756 self._extfileobj = True
2757 self._dbg(3, 'open_volume: using external fileobj {}', fileobj)
2758 self.name = os.path.abspath(name) if name else None
2759 self.fileobj = fileobj
2761 # init data structures
2763 self.members = [] # list of members as TarInfo objects
2764 self._loaded = False # flag if all members have been read
2765 self.offset = self.fileobj.tell()
2766 # current position in the archive file
2767 self.inodes = {} # dictionary caching the inodes of
2768 # archive members already added
2771 if self.mode == "r":
2772 self.firstmember = None
2773 self.firstmember = self.next()
2775 if self.mode == "a":
2776 # Move to the end of the archive,
2777 # before the first empty block.
2779 self.fileobj.seek(self.offset)
2781 tarinfo = self.tarinfo.fromtarfile(self)
2782 self.members.append(tarinfo)
2783 except EOFHeaderError:
2784 self.fileobj.seek(self.offset)
2786 except HeaderError as e:
2787 raise ReadError(str(e))
2789 if self.mode in "aw":
2792 if self.format == PAX_FORMAT:
2794 "GNU.volume.filename": str(self.volume_tarinfo.name),
2795 "GNU.volume.size": str(self.volume_tarinfo.size - self.volume_tarinfo.volume_offset),
2796 "GNU.volume.offset": str(self.volume_tarinfo.volume_offset),
2799 self.pax_headers.update(volume_info)
2801 if isinstance(self.fileobj, _Stream):
2802 self.fileobj._init_write_gz ()
2803 buf = self.tarinfo.create_pax_global_header(volume_info.copy())
2804 self.fileobj.write(buf)
2805 self.offset += len(buf)
2806 except Exception as exn:
2807 if not self._extfileobj:
2808 self.fileobj.close()
2812 def extractall(self, path=".", members=None, filter=None):
2813 """Extract all members from the archive to the current working
2814 directory and set owner, modification time and permissions on
2815 directories afterwards. `path' specifies a different directory
2816 to extract to. `members' is optional and must be a subset of the
2817 list returned by getmembers().
2824 for tarinfo in members:
2825 if self.volume_number > 0 and tarinfo.ismultivol():
2828 if filter and not filter(tarinfo):
2832 # Extract directories with a safe mode.
2833 directories.append(tarinfo)
2834 tarinfo = copy.copy(tarinfo)
2835 tarinfo.mode = 0o0700
2836 # Do not set_attrs directories, as we will do that further down
2837 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
2839 # Reverse sort directories.
2840 directories.sort(key=lambda a: a.name)
2841 directories.reverse()
2843 # Set correct owner, mtime and filemode on directories.
2844 for tarinfo in directories:
2845 dirpath = os.path.join(path, tarinfo.name)
2847 self.chown(tarinfo, dirpath)
2848 self.utime(tarinfo, dirpath)
2849 self.chmod(tarinfo, dirpath)
2850 except ExtractError as e:
2851 if self.errorlevel > 1:
2854 self._dbg(1, "tarfile: %s" % e)
2856 def extract(self, member, path="", set_attrs=True, symlink_cb=None):
2857 """Extract a member from the archive to the current working directory,
2858 using its full name. Its file information is extracted as accurately
2859 as possible. `member' may be a filename or a TarInfo object. You can
2860 specify a different directory using `path'. File attributes (owner,
2861 mtime, mode) are set unless `set_attrs' is False.
2862 ``symlink_cb`` is a hook accepting a function that is passed the
2863 ``member``, ``path``, and ``set_attrs`` arguments if the tarinfo for
2864 ``member`` indicates a symlink in which case only the callback
2865 passed will be applied, skipping the actual extraction. In case the
2866 callback is invoked, its return value is passed on to the caller.
2870 if isinstance(member, str):
2871 tarinfo = self.getmember(member)
2875 # Prepare the link target for makelink().
2877 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2879 if symlink_cb is not None and tarinfo.issym():
2880 return symlink_cb(member, path, set_attrs)
2883 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2884 set_attrs=set_attrs)
2885 except EnvironmentError as e:
2886 if self.errorlevel > 0:
2889 if e.filename is None:
2890 self._dbg(1, "tarfile: %s" % e.strerror)
2892 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2893 except ExtractError as e:
2894 if self.errorlevel > 1:
2897 self._dbg(1, "tarfile: %s" % e)
2899 def extractfile(self, member):
2900 """Extract a member from the archive as a file object. `member' may be
2901 a filename or a TarInfo object. If `member' is a regular file or a
2902 link, an io.BufferedReader object is returned. Otherwise, None is
2907 if isinstance(member, str):
2908 tarinfo = self.getmember(member)
2912 if tarinfo.isreg() or tarinfo.ismultivol() or\
2913 tarinfo.type not in SUPPORTED_TYPES:
2914 # If a member's type is unknown, it is treated as a
2916 return self.fileobject(self, tarinfo)
2918 elif tarinfo.islnk() or tarinfo.issym():
2919 if isinstance(self.fileobj, _Stream):
2920 # A small but ugly workaround for the case that someone tries
2921 # to extract a (sym)link as a file-object from a non-seekable
2922 # stream of tar blocks.
2923 raise StreamError("cannot extract (sym)link as file object")
2925 # A (sym)link's file object is its target's file object.
2926 return self.extractfile(self._find_link_target(tarinfo))
2928 # If there's no data associated with the member (directory, chrdev,
2929 # blkdev, etc.), return None instead of a file object.
2932 def _extract_member(self, tarinfo, targetpath, set_attrs=True):
2933 """Extract the TarInfo object tarinfo to a physical
2934 file called targetpath.
2936 # Fetch the TarInfo object for the given name
2937 # and build the destination pathname, replacing
2938 # forward slashes to platform specific separators.
2939 targetpath = targetpath.rstrip("/")
2940 targetpath = targetpath.replace("/", os.sep)
2942 # Create all upper directories.
2943 upperdirs = os.path.dirname(targetpath)
2944 if upperdirs and not os.path.exists(upperdirs):
2945 # Create directories that are not part of the archive with
2946 # default permissions.
2947 os.makedirs(upperdirs)
2949 if tarinfo.islnk() or tarinfo.issym():
2950 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2952 self._dbg(1, tarinfo.name)
2955 self.makefile(tarinfo, targetpath)
2956 elif tarinfo.isdir():
2957 self.makedir(tarinfo, targetpath)
2958 elif tarinfo.isfifo():
2959 self.makefifo(tarinfo, targetpath)
2960 elif tarinfo.ischr() or tarinfo.isblk():
2961 self.makedev(tarinfo, targetpath)
2962 elif tarinfo.islnk() or tarinfo.issym():
2963 self.makelink(tarinfo, targetpath)
2964 elif tarinfo.type not in SUPPORTED_TYPES:
2965 self.makeunknown(tarinfo, targetpath)
2967 self.makefile(tarinfo, targetpath)
2970 self.chown(tarinfo, targetpath)
2971 if not tarinfo.issym():
2972 self.chmod(tarinfo, targetpath)
2973 self.utime(tarinfo, targetpath)
2975 #--------------------------------------------------------------------------
2976 # Below are the different file methods. They are called via
2977 # _extract_member() when extract() is called. They can be replaced in a
2978 # subclass to implement other functionality.
2980 def makedir(self, tarinfo, targetpath):
2981 """Make a directory called targetpath.
2984 # Use a safe mode for the directory, the real mode is set
2985 # later in _extract_member().
2986 os.mkdir(targetpath, 0o0700)
2987 except FileExistsError:
2990 def makefile(self, tarinfo, targetpath):
2991 """Make a file called targetpath.
2993 source = self.fileobj
2994 source.seek(tarinfo.offset_data)
2997 target = bltn_open(targetpath, "wb")
2999 if tarinfo.sparse is not None:
3001 for offset, size in tarinfo.sparse:
3003 copyfileobj(source, target, size)
3004 target.seek(tarinfo.size)
3013 copyfileobj(source, target, tarinfo.size)
3016 # only if we are extracting a multivolume this can be treated
3017 if not self.new_volume_handler:
3019 raise Exception("We need to read a new volume and you"
3020 " didn't supply a new_volume_handler")
3022 # the new volume handler should do everything needed to
3023 # start working in a new volume. usually, the handler calls
3024 # to self.open_volume
3025 self.volume_number += 1
3026 self.new_volume_handler(self, self.base_name, self.volume_number)
3027 tarinfo = self.firstmember
3028 source = self.fileobj
3033 def makeunknown(self, tarinfo, targetpath):
3034 """Make a file from a TarInfo object with an unknown type
3037 self.makefile(tarinfo, targetpath)
3038 self._dbg(1, "tarfile: Unknown file type %r, " \
3039 "extracted as regular file." % tarinfo.type)
3041 def makefifo(self, tarinfo, targetpath):
3042 """Make a fifo called targetpath.
3044 if hasattr(os, "mkfifo"):
3045 os.mkfifo(targetpath)
3047 raise ExtractError("fifo not supported by system")
3049 def makedev(self, tarinfo, targetpath):
3050 """Make a character or block device called targetpath.
3052 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
3053 raise ExtractError("special devices not supported by system")
3057 mode |= stat.S_IFBLK
3059 mode |= stat.S_IFCHR
3061 os.mknod(targetpath, mode,
3062 os.makedev(tarinfo.devmajor, tarinfo.devminor))
3064 def makelink(self, tarinfo, targetpath):
3065 """Make a (symbolic) link called targetpath. If it cannot be created
3066 (platform limitation), we try to make a copy of the referenced file
3070 # For systems that support symbolic and hard links.
3072 os.symlink(tarinfo.linkname, targetpath)
3075 if os.path.exists(tarinfo._link_target):
3076 os.link(tarinfo._link_target, targetpath)
3078 self._extract_member(self._find_link_target(tarinfo),
3080 except symlink_exception:
3082 self._extract_member(self._find_link_target(tarinfo),
3085 raise ExtractError("unable to resolve link inside archive")
3087 def chown(self, tarinfo, targetpath):
3088 """Set owner of targetpath according to tarinfo.
3090 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
3091 # We have to be root to do so.
3093 g = grp.getgrnam(tarinfo.gname)[2]
3097 u = pwd.getpwnam(tarinfo.uname)[2]
3101 if tarinfo.issym() and hasattr(os, "lchown"):
3102 os.lchown(targetpath, u, g)
3104 os.chown(targetpath, u, g)
3105 except OSError as e:
3106 raise ExtractError("could not change owner")
3108 def chmod(self, tarinfo, targetpath):
3109 """Set file permissions of targetpath according to tarinfo.
3111 if hasattr(os, 'chmod'):
3113 os.chmod(targetpath, tarinfo.mode)
3114 except OSError as e:
3115 raise ExtractError("could not change mode")
3117 def utime(self, tarinfo, targetpath):
3118 """Set modification time of targetpath according to tarinfo.
3120 if not hasattr(os, 'utime'):
3123 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
3124 except OSError as e:
3125 raise ExtractError("could not change modification time")
3127 #--------------------------------------------------------------------------
3129 """Return the next member of the archive as a TarInfo object, when
3130 TarFile is opened for reading. Return None if there is no more
3134 if self.firstmember is not None:
3135 m = self.firstmember
3136 self.firstmember = None
3139 # Read the next block.
3140 self.fileobj.seek(self.offset)
3144 tarinfo = self.tarinfo.fromtarfile(self)
3145 except EOFHeaderError as e:
3146 if self.ignore_zeros:
3147 self._dbg(2, "0x%X: %s" % (self.offset, e))
3148 self.offset += BLOCKSIZE
3150 except InvalidHeaderError as e:
3151 if self.ignore_zeros:
3152 self._dbg(2, "0x%X: %s" % (self.offset, e))
3153 self.offset += BLOCKSIZE
3155 elif self.offset == 0:
3156 raise ReadError(str(e))
3157 except EmptyHeaderError:
3158 if self.offset == 0:
3159 raise ReadError("empty file")
3160 except TruncatedHeaderError as e:
3161 if self.offset == 0:
3162 raise ReadError(str(e))
3163 except SubsequentHeaderError as e:
3164 raise ReadError(str(e))
3167 if tarinfo is not None:
3168 if self.save_to_members:
3169 self.members.append(tarinfo)
3175 #--------------------------------------------------------------------------
3176 # Little helper methods:
3178 def _getmember(self, name, tarinfo=None, normalize=False):
3179 """Find an archive member by name from bottom to top.
3180 If tarinfo is given, it is used as the starting point.
3182 # Ensure that all members have been loaded.
3183 members = self.getmembers()
3185 # Limit the member search list up to tarinfo.
3186 if tarinfo is not None:
3187 members = members[:members.index(tarinfo)]
3190 name = os.path.normpath(name)
3192 for member in reversed(members):
3194 member_name = os.path.normpath(member.name)
3196 member_name = member.name
3198 if name == member_name:
3202 """Read through the entire archive file and look for readable
3206 tarinfo = self.next()
3211 def _check(self, mode=None):
3212 """Check if TarFile is still open, and if the operation's mode
3213 corresponds to TarFile's mode.
3216 raise OSError("%s is closed" % self.__class__.__name__)
3217 if mode is not None and self.mode not in mode:
3218 raise OSError("bad operation for mode %r" % self.mode)
3220 def _find_link_target(self, tarinfo):
3221 """Find the target member of a symlink or hardlink member in the
3225 # Always search the entire archive.
3226 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
3229 # Search the archive before the link, because a hard link is
3230 # just a reference to an already archived file.
3231 linkname = tarinfo.linkname
3234 member = self._getmember(linkname, tarinfo=limit, normalize=True)
3236 raise KeyError("linkname %r not found" % linkname)
3240 """Provide an iterator object.
3243 return iter(self.members)
3245 return TarIter(self)
3247 def _dbg(self, level, msg, *args):
3248 """Write debugging output to sys.stderr.
3250 if level <= self.debug:
3251 print(msg.format(*args), file=sys.stderr)
3253 def __enter__(self):
3257 def __exit__(self, type, value, traceback):
3261 # An exception occurred. We must not call close() because
3262 # it would try to write end-of-archive blocks and padding.
3263 if not self._extfileobj:
3264 self.fileobj.close()
3271 for tarinfo in TarFile(...):
3275 def __init__(self, tarfile):
3276 """Construct a TarIter object.
3278 self.tarfile = tarfile
3281 """Return iterator object.
3285 """Return the next item using TarFile's next() method.
3286 When all members have been read, set TarFile as _loaded.
3288 # Fix for SF #1100429: Under rare circumstances it can
3289 # happen that getmembers() is called during iteration,
3290 # which will cause TarIter to stop prematurely.
3292 if self.index == 0 and self.tarfile.firstmember is not None:
3293 tarinfo = self.tarfile.next()
3294 elif self.index < len(self.tarfile.members):
3295 tarinfo = self.tarfile.members[self.index]
3296 elif not self.tarfile._loaded:
3297 tarinfo = self.tarfile.next()
3299 self.tarfile._loaded = True
3307 #---------------------------------------------------------
3308 # support functionality for rescue mode
3309 #---------------------------------------------------------
3311 def read_tarobj_at_offset (fileobj, offset, mode, secret=None):
3315 if ks == crypto.PDTCRYPT_SECRET_PW:
3316 decr = crypto.Decrypt (password=secret [1])
3317 elif ks == crypto.PDTCRYPT_SECRET_KEY:
3318 key = binascii.unhexlify (secret [1])
3319 decr = crypto.Decrypt (key=key)
3324 TarFile.open_at_offset (offset,
3330 save_to_members=False,
3331 tolerance=TOLERANCE_RESCUE)
3333 return tarobj.next ()
3336 def idxent_of_tarinfo (tarinfo):
3338 Scrape the information relevant for the index from a *TarInfo* object.
3339 Keys like the inode number that lack a corresponding field in a TarInfo
3340 will be set to some neutral value.
3345 , "path" : "snapshot://annotations.db"
3349 , "ctime" : 1502798115
3350 , "mtime" : 1502196423
3359 { "inode" : 0 # ignored when reading the index
3360 , "uid" : tarinfo.uid
3361 , "gid" : tarinfo.gid
3362 , "path" : tarinfo.name # keeping URI scheme
3363 , "offset" : 0 # to be added by the caller
3364 , "volume" : tarinfo.volume_offset
3365 , "mode" : tarinfo.mode
3366 , "ctime" : tarinfo.mtime
3367 , "mtime" : tarinfo.mtime
3368 , "size" : tarinfo.size
3369 , "type" : tarinfo.type
3373 def gen_rescue_index (backup_tar_path, mode, password=None, key=None):
3374 psidx = [] # pseudo index, return value
3379 if password is not None:
3380 secret = (crypto.PDTCRYPT_SECRET_PW, password)
3381 elif key is not None:
3382 secret = (crypto.PDTCRYPT_SECRET_KEY, key)
3384 if secret is not None:
3385 offsets = crypto.reconstruct_offsets (backup_tar_path, secret)
3386 fileobj = bltn_open (backup_tar_path, "rb")
3387 infos = [ (off, read_tarobj_at_offset (fileobj, off, mode, secret=secret))
3388 for off in offsets ]
3390 ie = idxent_of_tarinfo (ti)
3393 psidx = [ aux (o, ti) for o, ti in infos ]
3397 #--------------------
3398 # exported functions
3399 #--------------------
3400 def is_tarfile(name):
3401 """Return True if name points to a tar archive that we
3402 are able to handle, else return False.