initial port to python 3, not finished
authorEduardo Robles Elvira <edulix@wadobo.com>
Mon, 4 Nov 2013 07:50:55 +0000 (08:50 +0100)
committerEduardo Robles Elvira <edulix@wadobo.com>
Mon, 4 Nov 2013 07:50:55 +0000 (08:50 +0100)
12 files changed:
deltatar/aescrypto.py
deltatar/deltatar.py
deltatar/tarfile.py
filesplit.py
rescue_tar.py
runtests.py [changed mode: 0644->0755]
testing/__init__.py
testing/test_concat_compress.py
testing/test_deltatar.py
testing/test_encryption.py
testing/test_multivol.py
testing/test_rescue_tar.py

index bb8cbf8..2dce10c 100644 (file)
@@ -1,20 +1,30 @@
-#!/usr/bin/env python
-
+#!/usr/bin/env python3
+#-------------------------------------------------------------------
+# aescrypto.py
+#-------------------------------------------------------------------
 # Copyright (C) 2013 Intra2net AG
+# All rights reserved.
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published
-# by the Free Software Foundation; either version 3 of the License, or
-# (at your option) any later version.
+# Permission  is  hereby granted,  free  of charge,  to  any person
+# obtaining a  copy of  this software  and associated documentation
+# files  (the  "Software"),  to   deal  in  the  Software   without
+# restriction,  including  without limitation  the  rights to  use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies  of  the  Software,  and to  permit  persons  to  whom the
+# Software  is  furnished  to  do  so,  subject  to  the  following
+# conditions:
 #
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
+# The above copyright  notice and this  permission notice shall  be
+# included in all copies or substantial portions of the Software.
 #
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see
-# <http://www.gnu.org/licenses/lgpl-3.0.html>
+# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
+# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
+# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
+# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
+# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
+# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
 
 # Author: Daniel Garcia <danigm@wadobo.com>
 
@@ -30,11 +40,14 @@ from hashlib import md5
 # we ignore the PowmInsecureWarning warning given by libgmp4 because it doesn't
 # affect our code
 import warnings
-from Crypto.pct_warnings import PowmInsecureWarning
-warnings.simplefilter("ignore", PowmInsecureWarning)
+try:
+    from Crypto.pct_warnings import PowmInsecureWarning
+    warnings.simplefilter("ignore", PowmInsecureWarning)
 
-from Crypto.Cipher import AES
-from Crypto import Random
+    from Crypto.Cipher import AES
+    from Crypto import Random
+except:
+    pass
 
 
 class AESCrypt:
@@ -42,17 +55,19 @@ class AESCrypt:
     This class provides a simple method to encrypt and decrypt text using
     AES.
     '''
-    def __init__(self, password, salt='', key_length=128):
+    def __init__(self, password, salt=b'', key_length=128):
         self.bs = AES.block_size
         self.mode = AES.MODE_CBC
         if key_length not in [128, 256]:
             raise Exception('Invalid key_length, only 128 and 256 allowed')
-        self.key_length = key_length/8
-        self.buf = ''
+        self.key_length = int(key_length/8)
+        self.buf = b''
         if salt:
             self.salt = salt
         else:
-            self.salt = Random.new().read(self.bs - len('Salted__'))
+            self.salt = Random.new().read(self.bs - len(b'Salted__'))
+        if isinstance(password, str):
+            password = bytes(password, 'UTF-8')
         self.password = password
 
         self.get_pad = self.get_random_pad
@@ -68,7 +83,7 @@ class AESCrypt:
         '''
         self.derive_key_and_iv()
         self.cipher = AES.new(self.key, self.mode, self.iv)
-        self.salt_str = 'Salted__' + self.salt
+        self.salt_str = b'Salted__' + self.salt
 
     def close_enc(self):
         '''
@@ -78,7 +93,7 @@ class AESCrypt:
         returns the encrypted text
         '''
         chunk = self.buf
-        self.buf = ''
+        self.buf = b''
         need_padding = len(chunk) % self.bs != 0
         padding_length = self.bs - len(chunk) % self.bs
         chunk += self.get_pad(padding_length)
@@ -94,10 +109,10 @@ class AESCrypt:
         self.buf += chunk
         if len(self.buf) % self.bs == 0:
             cipher = self.cipher.encrypt(self.buf)
-            self.buf = ''
+            self.buf = b''
             return cipher
 
-        cipher = ''
+        cipher = b''
         while len(self.buf) >= self.bs:
             chunk = self.buf[:self.bs]
             self.buf = self.buf[self.bs:]
@@ -128,19 +143,19 @@ class AESCrypt:
         '''
         Calculates the salt for an input encrypted file
         '''
-        self.salt = instream.read(self.bs)[len('Salted__'):]
+        self.salt = instream.read(self.bs)[len(b'Salted__'):]
 
     def get_salt_str(self, instr):
         '''
         Calculates the salt for an input encrypted string
         '''
-        self.salt = instr[len('Salted__'):self.bs]
+        self.salt = instr[len(b'Salted__'):self.bs]
 
     def derive_key_and_iv(self):
         '''
         Generates the key and iv using the password and salt as seed
         '''
-        d = d_i = ''
+        d = d_i = b''
         l = self.key_length + self.bs
         while len(d) < l:
             d_i = md5(d_i + self.password + self.salt).digest()
@@ -152,19 +167,19 @@ class AESCrypt:
         '''
         Returns an ISO_10126 pad, which is random
         '''
-        return Random.new().read(padding_length - 1) + chr(padding_length)
+        return Random.new().read(padding_length - 1) + bytes([padding_length])
 
     def split_random_pad(self, chunk):
         '''
         Returns the chunk without the ISO_10126 pad
         '''
-        return chunk[:-ord(chunk[-1])]
+        return chunk[:-chunk[-1]]
 
     def get_pkcs5_pad(self, padding_length):
         '''
         Returns the PKCS pad
         '''
-        return padding_length * chr(padding_length)
+        return padding_length * bytes([padding_length])
 
     def split_pkcs5_pad(self, chunk):
         '''
@@ -195,7 +210,7 @@ def decrypt(in_file, out_file, password):
     salt = aes.get_salt(in_file)
     aes.init()
 
-    next_chunk = ''
+    next_chunk = b''
     finished = False
     while not finished:
         buf = in_file.read(1024 * aes.bs)
@@ -215,4 +230,4 @@ if __name__ == '__main__':
     cipher.seek(0)
     decrypt(cipher, out, 'key')
     out.seek(0)
-    print out.read()
+    print(out.read())
index 1875df5..b19afa8 100644 (file)
@@ -277,7 +277,7 @@ class DeltaTar(object):
             match = NO_MATCH
             for i in self.included_files:
                 # it can be either a regexp or a string
-                if isinstance(i, basestring):
+                if isinstance(i, str):
                     # if the string matches, then continue
                     if i == path:
                         match = MATCH
@@ -322,7 +322,7 @@ class DeltaTar(object):
         if match != PARENT_MATCH:
             for e in self.excluded_files:
                 # it can be either a regexp or a string
-                if isinstance(e, basestring):
+                if isinstance(e, str):
                     # if the string matches, then exclude
                     if e == path:
                         return NO_MATCH
@@ -404,7 +404,7 @@ class DeltaTar(object):
 
         return {
             u'type': ptype,
-            u'path': unicode(path),
+            u'path': path,
             u'mode': mode,
             u'mtime': int(stinfo.st_mtime),
             u'ctime': int(stinfo.st_ctime),
@@ -506,10 +506,10 @@ class DeltaTar(object):
           backup in volumes. Optional (won't split in volumes by default).
         '''
         # check input
-        if not isinstance(source_path, basestring):
+        if not isinstance(source_path, str):
             raise Exception('Source path must be a string')
 
-        if not isinstance(backup_path, basestring):
+        if not isinstance(backup_path, str):
             raise Exception('Backup path must be a string')
 
         if not os.path.exists(source_path) or not os.path.isdir(source_path):
@@ -575,9 +575,9 @@ class DeltaTar(object):
         # wraps some args from context into the handler
         new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
 
-        index_fd.write('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full" }\n')
+        index_fd.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full" }\n', 'UTF-8'))
 
-        s = '{"type": "BEGIN-FILE-LIST"}\n'
+        s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
         # calculate checksum and write into the stream
         crc = binascii.crc32(s)
         index_fd.write(s)
@@ -609,15 +609,15 @@ class DeltaTar(object):
             statd['offset'] = tarobj.get_last_member_offset()
 
             # store in the index the stat dict
-            s = json.dumps(statd) + '\n'
+            s = bytes(json.dumps(statd) + '\n', 'UTF-8')
             crc = binascii.crc32(s, crc) & 0xffffffff
             index_fd.write(s)
 
-        s = '{"type": "END-FILE-LIST"}\n'
+        s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
         crc = binascii.crc32(s, crc) & 0xffffffff
         index_fd.write(s)
-        index_fd.write('{"type": "file-list-checksum", "checksum": %d}\n' %\
-                        crc)
+        s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
+        index_fd.write(s)
         index_fd.close()
         os.chdir(cwd)
         tarobj.close()
@@ -640,10 +640,10 @@ class DeltaTar(object):
         the index_mode setup in the constructor.
         '''
         # check/sanitize input
-        if not isinstance(source_path, basestring):
+        if not isinstance(source_path, str):
             raise Exception('Source path must be a string')
 
-        if not isinstance(backup_path, basestring):
+        if not isinstance(backup_path, str):
             raise Exception('Backup path must be a string')
 
         if not os.path.exists(source_path) or not os.path.isdir(source_path):
@@ -659,7 +659,7 @@ class DeltaTar(object):
         if max_volume_size != None:
             max_volume_size = max_volume_size*1024*1024
 
-        if not isinstance(previous_index_path, basestring):
+        if not isinstance(previous_index_path, str):
             raise Exception('previous_index_path must be A string')
 
         if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
@@ -721,9 +721,9 @@ class DeltaTar(object):
         # wraps some args from context into the handler
         new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
 
-        index_fd.write('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff" }\n')
+        index_fd.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff" }\n', 'UTF-8'))
 
-        s = '{"type": "BEGIN-FILE-LIST"}\n'
+        s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
         # calculate checksum and write into the stream
         crc = binascii.crc32(s)
         index_fd.write(s)
@@ -784,7 +784,7 @@ class DeltaTar(object):
             if action == 'snapshot':
                 # calculate stat dict for current file
                 stat = dpath.copy()
-                stat['path'] = u"snapshot://" + dpath['path']
+                stat['path'] = "snapshot://" + dpath['path']
                 stat['volume'] = self.vol_no
 
                 # backup file
@@ -794,7 +794,7 @@ class DeltaTar(object):
                 stat['offset'] = tarobj.get_last_member_offset()
 
                 # store in the index the stat dict
-                s = json.dumps(stat) + '\n'
+                s = bytes(json.dumps(stat) + '\n', 'UTF-8')
                 crc = binascii.crc32(s, crc) & 0xffffffff
                 index_fd.write(s)
             elif action == 'delete':
@@ -807,7 +807,7 @@ class DeltaTar(object):
                 tarobj.add("/dev/null", arcname=stat['path'])
 
                 # store in the index the stat dict
-                s = json.dumps(stat) + '\n'
+                s = bytes(json.dumps(stat) + '\n', 'UTF-8')
                 crc = binascii.crc32(s, crc) & 0xffffffff
                 index_fd.write(s)
             elif action == 'list':
@@ -816,15 +816,15 @@ class DeltaTar(object):
                 # unchanged files do not enter in the backup, only in the index
 
                 # store in the index the stat dict
-                s = json.dumps(stat) + '\n'
+                s = bytes(json.dumps(stat) + '\n', 'UTF-8')
                 crc = binascii.crc32(s, crc) & 0xffffffff
                 index_fd.write(s)
 
-        s = '{"type": "END-FILE-LIST"}\n'
+        s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
         crc = binascii.crc32(s, crc) & 0xffffffff
         index_fd.write(s)
-        index_fd.write('{"type": "file-list-checksum", "checksum": %d}\n' %\
-                        crc)
+        s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
+        index_fd.write(s)
         index_fd.close()
         index_it.release()
         os.chdir(cwd)
@@ -879,13 +879,13 @@ class DeltaTar(object):
                     self.f.close()
                 self.f = None
 
-            def next(self):
+            def __next__(self):
                 # read each file in the index and process it to do the retore
                 j = {}
                 l_no = -1
                 try:
                     j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
-                except Exception, e:
+                except Exception as e:
                     if self.f:
                         self.f.close()
                     raise e
@@ -903,7 +903,7 @@ class DeltaTar(object):
                     self.delta_tar.logger.warn('unrecognized type to be '
                                         'restored: %s, line %d' % (op_type, l_no))
                     # iterate again
-                    return self.next()
+                    return self.__next__()
 
                 return j, l_no
 
@@ -951,11 +951,11 @@ class DeltaTar(object):
                     self.tar_obj.close()
                 self.tar_obj = None
 
-            def next(self):
+            def __next__(self):
                 '''
                 Read each member and return it as a stat dict
                 '''
-                tarinfo = self.tar_obj.next()
+                tarinfo = self.tar_obj.__iter__().__next__()
                 if not tarinfo or tarinfo == self.last_member:
                     raise StopIteration
 
@@ -993,7 +993,7 @@ class DeltaTar(object):
         '''
         while True:
             try:
-                path = iter.next()
+                path = iter.__next__()
                 if strip == 0:
                     yield self._stat_dict(path)
                 else:
@@ -1016,7 +1016,7 @@ class DeltaTar(object):
         while True:
             if not elem1:
                 try:
-                    elem1, l_no = it1.next()
+                    elem1, l_no = it1.__next__()
                 except StopIteration:
                     if elem2:
                         yield (None, elem2, l_no)
@@ -1027,7 +1027,7 @@ class DeltaTar(object):
                     break
             if not elem2:
                 try:
-                    elem2 = it2.next()
+                    elem2 = it2.__next__()
                     if isinstance(elem2, tuple):
                         elem2 = elem2[0]
                 except StopIteration:
@@ -1075,7 +1075,7 @@ class DeltaTar(object):
         return (index1, index2)
 
     def list_backup(self, backup_tar_path, list_func=None):
-        if not isinstance(backup_tar_path, basestring):
+        if not isinstance(backup_tar_path, str):
             raise Exception('Backup tar path must be a string')
 
         if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
@@ -1114,7 +1114,7 @@ class DeltaTar(object):
 
         def filter(cls, list_func, tarinfo):
             if list_func is None:
-                print tarinfo.path
+                print(tarinfo.path)
             else:
                 list_func(tarinfo)
             return False
@@ -1148,7 +1148,7 @@ class DeltaTar(object):
         specified in the constructor.
         '''
         # check/sanitize input
-        if not isinstance(target_path, basestring):
+        if not isinstance(target_path, str):
             raise Exception('Target path must be a string')
 
         if backup_indexes_paths is None and backup_tar_path == []:
@@ -1160,7 +1160,7 @@ class DeltaTar(object):
             mode = "diff"
 
         if mode == "tar":
-            if not isinstance(backup_tar_path, basestring):
+            if not isinstance(backup_tar_path, str):
                 raise Exception('Backup tar path must be a string')
 
             if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
@@ -1179,7 +1179,7 @@ class DeltaTar(object):
                     ' the open mode you provided is "%s"' % self.mode)
 
             for index in backup_indexes_paths:
-                if not isinstance(index, basestring):
+                if not isinstance(index, str):
                     raise Exception('indices must be strings')
 
                 if not os.path.exists(index) or not os.path.isfile(index):
@@ -1238,8 +1238,8 @@ class DeltaTar(object):
                     continue
                 try:
                     helper.restore(ipath, l_no)
-                except Exception, e:
-                    print "FAILED to restore: ", ipath.get('path', '')
+                except Exception as e:
+                    print("FAILED to restore: ", ipath.get('path', ''))
                 continue
 
             # if both files are equal, we have nothing to restore
@@ -1273,8 +1273,8 @@ class DeltaTar(object):
         l = f.readline()
         l_no += 1
         try:
-            j = json.loads(l)
-        except ValueError, e:
+            j = json.loads(l.decode('UTF-8'))
+        except ValueError as e:
             raise Exception("error parsing this json line "
                 "(line number %d): %s" % (l_no, l))
         return j, l_no
@@ -1467,7 +1467,7 @@ class RestoreHelper(object):
         # found in one index and we have to go to the next index.
         if data['iterator'] is None:
             it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
-            d, l_no = it.next()
+            d, l_no = it.__next__()
         else:
             it = data['iterator']
             d = data['last_itelement']
@@ -1490,7 +1490,7 @@ class RestoreHelper(object):
                 return None, 0, ''
 
             try:
-                d, l_no = it.next()
+                d, l_no = it.__next__()
             except StopIteration:
                 data['last_itelement'] = d
                 data['last_lno'] = l_no
@@ -1533,7 +1533,7 @@ class RestoreHelper(object):
                     except EnvironmentError:
                         raise tarfile.ExtractError("could not change owner")
 
-            except tarfile.ExtractError, e:
+            except tarfile.ExtractError as e:
                 self._deltatar.logger.warn('tarfile: %s' % e)
 
     @staticmethod
@@ -1576,7 +1576,7 @@ class RestoreHelper(object):
                 vol_path = os.path.join(backup_path, vol_name)
                 if index_data['vol_fd']:
                     index_data['vol_fd'].close()
-                index_data['vol_fd'] = open(vol_path, 'r')
+                index_data['vol_fd'] = open(vol_path, 'rb')
 
                 # force reopen of the tarobj because of new volume
                 if index_data['tarobj']:
@@ -1586,7 +1586,7 @@ class RestoreHelper(object):
             # seek tarfile if needed
             offset = file_data.get('offset', -1)
             if index_data['tarobj']:
-                member = index_data['tarobj'].next()
+                member = index_data['tarobj'].__iter__().__next__()
                 if not member or member.path != file_data['path']:
                     # force a seek and reopen
                     index_data['tarobj'].close()
@@ -1603,7 +1603,7 @@ class RestoreHelper(object):
                     new_volume_handler=index_data['new_volume_handler'],
                     save_to_members=False)
 
-                member = index_data['tarobj'].next()
+                member = index_data['tarobj'].__iter__().__next__()
 
         member.path = unprefixed_path
         member.name = unprefixed_path
@@ -1611,7 +1611,7 @@ class RestoreHelper(object):
         if op_type == 'directory':
             self.add_member_dir(member)
             member = copy.copy(member)
-            member.mode = 0700
+            member.mode = 0o0700
 
             # if it's an existing directory, we then don't need to recreate it
             # just set the right permissions, mtime and that kind of stuff
@@ -1640,5 +1640,5 @@ class RestoreHelper(object):
 
 class DirItem(object):
     def __init__(self, **kwargs):
-        for k, v in kwargs.iteritems():
+        for k, v in kwargs.items():
             setattr(self, k, v)
\ No newline at end of file
index 5dcfe9d..da65708 100644 (file)
@@ -1,5 +1,4 @@
-#!/usr/bin/env python
-# -*- coding: iso-8859-1 -*-
+#!/usr/bin/env python3
 #-------------------------------------------------------------------
 # tarfile.py
 #-------------------------------------------------------------------
@@ -44,6 +43,7 @@ __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend, Eduardo Robl
 #---------
 import sys
 import os
+import io
 import shutil
 import stat
 import errno
@@ -53,48 +53,59 @@ import copy
 import re
 import operator
 
-import aescrypto
+from . import aescrypto
 
 try:
     import grp, pwd
 except ImportError:
     grp = pwd = None
 
+# os.symlink on Windows prior to 6.0 raises NotImplementedError
+symlink_exception = (AttributeError, NotImplementedError)
+try:
+    # OSError (winerror=1314) will be raised if the caller does not hold the
+    # SeCreateSymbolicLinkPrivilege privilege
+    symlink_exception += (OSError,)
+except NameError:
+    pass
+
 # from tarfile import *
 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
 
+from builtins import open as _open # Since 'open' is TarFile.open
+
 #---------------------------------------------------------
 # tar constants
 #---------------------------------------------------------
-NUL = "\0"                      # the null character
+NUL = b"\0"                     # the null character
 BLOCKSIZE = 512                 # length of processing blocks
 RECORDSIZE = BLOCKSIZE * 20     # length of records
-GNU_MAGIC = "ustar  \0"         # magic gnu tar string
-POSIX_MAGIC = "ustar\x0000"     # magic posix tar string
+GNU_MAGIC = b"ustar  \0"        # magic gnu tar string
+POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string
 
 LENGTH_NAME = 100               # maximum length of a filename
 LENGTH_LINK = 100               # maximum length of a linkname
 LENGTH_PREFIX = 155             # maximum length of the prefix field
 
-REGTYPE = "0"                   # regular file
-AREGTYPE = "\0"                 # regular file
-LNKTYPE = "1"                   # link (inside tarfile)
-SYMTYPE = "2"                   # symbolic link
-CHRTYPE = "3"                   # character special device
-BLKTYPE = "4"                   # block special device
-DIRTYPE = "5"                   # directory
-FIFOTYPE = "6"                  # fifo special device
-CONTTYPE = "7"                  # contiguous file
-
-GNUTYPE_LONGNAME = "L"          # GNU tar longname
-GNUTYPE_LONGLINK = "K"          # GNU tar longlink
-GNUTYPE_SPARSE = "S"            # GNU tar sparse file
-GNUTYPE_MULTIVOL = "M"          # GNU tar continuation of a file that began on
+REGTYPE = b"0"                  # regular file
+AREGTYPE = b"\0"                # regular file
+LNKTYPE = b"1"                  # link (inside tarfile)
+SYMTYPE = b"2"                  # symbolic link
+CHRTYPE = b"3"                  # character special device
+BLKTYPE = b"4"                  # block special device
+DIRTYPE = b"5"                  # directory
+FIFOTYPE = b"6"                 # fifo special device
+CONTTYPE = b"7"                 # contiguous file
+
+GNUTYPE_LONGNAME = b"L"         # GNU tar longname
+GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
+GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
+GNUTYPE_MULTIVOL = b"M"         # GNU tar continuation of a file that began on
                                 # another volume
 
-XHDTYPE = "x"                   # POSIX.1-2001 extended header
-XGLTYPE = "g"                   # POSIX.1-2001 global header
-SOLARIS_XHDTYPE = "X"           # Solaris extended header
+XHDTYPE = b"x"                  # POSIX.1-2001 extended header
+XGLTYPE = b"g"                  # POSIX.1-2001 global header
+SOLARIS_XHDTYPE = b"X"          # Solaris extended header
 
 USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
 GNU_FORMAT = 1                  # GNU tar format
@@ -123,6 +134,9 @@ GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
               "uid", "gid", "uname", "gname")
 
+# Fields from a pax header that are affected by hdrcharset.
+PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
+
 # Fields in a pax header that are numbers, all other fields
 # are treated as strings.
 PAX_NUMBER_FIELDS = {
@@ -135,69 +149,57 @@ PAX_NUMBER_FIELDS = {
 }
 
 #---------------------------------------------------------
-# Bits used in the mode field, values in octal.
-#---------------------------------------------------------
-S_IFLNK = 0120000        # symbolic link
-S_IFREG = 0100000        # regular file
-S_IFBLK = 0060000        # block device
-S_IFDIR = 0040000        # directory
-S_IFCHR = 0020000        # character device
-S_IFIFO = 0010000        # fifo
-
-TSUID   = 04000          # set UID on execution
-TSGID   = 02000          # set GID on execution
-TSVTX   = 01000          # reserved
-
-TUREAD  = 0400           # read by owner
-TUWRITE = 0200           # write by owner
-TUEXEC  = 0100           # execute/search by owner
-TGREAD  = 0040           # read by group
-TGWRITE = 0020           # write by group
-TGEXEC  = 0010           # execute/search by group
-TOREAD  = 0004           # read by other
-TOWRITE = 0002           # write by other
-TOEXEC  = 0001           # execute/search by other
-
-#---------------------------------------------------------
 # initialization
 #---------------------------------------------------------
-ENCODING = sys.getfilesystemencoding()
-if ENCODING is None:
-    ENCODING = sys.getdefaultencoding()
+
+if os.name in ("nt", "ce"):
+    ENCODING = "utf-8"
+else:
+    ENCODING = sys.getfilesystemencoding()
 
 #---------------------------------------------------------
 # Some useful functions
 #---------------------------------------------------------
 
-def stn(s, length):
-    """Convert a python string to a null-terminated string buffer.
+def stn(s, length, encoding, errors):
+    """Convert a string to a null-terminated bytes object.
     """
+    s = s.encode(encoding, errors)
     return s[:length] + (length - len(s)) * NUL
 
-def nts(s):
-    """Convert a null-terminated string field to a python string.
+def nts(s, encoding, errors):
+    """Convert a null-terminated bytes object to a string.
     """
-    # Use the string up to the first null char.
-    p = s.find("\0")
-    if p == -1:
-        return s
-    return s[:p]
+    p = s.find(b"\0")
+    if p != -1:
+        s = s[:p]
+    return s.decode(encoding, errors)
+
+def sbtn(s, length, encoding, errors):
+    """Convert a string or a bunch of bytes to a null-terminated bytes object
+    of specific size.
+    """
+    if isinstance(s, str):
+        s = s.encode(encoding, errors)
+    return s[:length] + (length - len(s)) * NUL
 
 def nti(s):
     """Convert a number field to a python number.
     """
     # There are two possible encodings for a number field, see
     # itn() below.
-    if s[0] != chr(0200):
+    if s[0] in (0o200, 0o377):
+        n = 0
+        for i in range(len(s) - 1):
+            n <<= 8
+            n += s[i + 1]
+        if s[0] == 0o377:
+            n = -(256 ** (len(s) - 1) - n)
+    else:
         try:
-            n = int(nts(s) or "0", 8)
+            n = int(nts(s, "ascii", "strict") or "0", 8)
         except ValueError:
             raise InvalidHeaderError("invalid header")
-    else:
-        n = 0L
-        for i in xrange(len(s) - 1):
-            n <<= 8
-            n += ord(s[i + 1])
     return n
 
 def itn(n, digits=8, format=DEFAULT_FORMAT):
@@ -206,46 +208,27 @@ def itn(n, digits=8, format=DEFAULT_FORMAT):
     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
     # octal digits followed by a null-byte, this allows values up to
     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
-    # that if necessary. A leading 0200 byte indicates this particular
-    # encoding, the following digits-1 bytes are a big-endian
-    # representation. This allows values up to (256**(digits-1))-1.
+    # that if necessary. A leading 0o200 or 0o377 byte indicate this
+    # particular encoding, the following digits-1 bytes are a big-endian
+    # base-256 representation. This allows values up to (256**(digits-1))-1.
+    # A 0o200 byte indicates a positive number, a 0o377 byte a negative
+    # number.
     if 0 <= n < 8 ** (digits - 1):
-        s = "%0*o" % (digits - 1, n) + NUL
-    else:
-        if format != GNU_FORMAT or n >= 256 ** (digits - 1):
-            raise ValueError("overflow in number field")
-
-        if n < 0:
-            # XXX We mimic GNU tar's behaviour with negative numbers,
-            # this could raise OverflowError.
-            n = struct.unpack("L", struct.pack("l", n))[0]
+        s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
+    elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
+        if n >= 0:
+            s = bytearray([0o200])
+        else:
+            s = bytearray([0o377])
+            n = 256 ** digits + n
 
-        s = ""
-        for i in xrange(digits - 1):
-            s = chr(n & 0377) + s
+        for i in range(digits - 1):
+            s.insert(1, n & 0o377)
             n >>= 8
-        s = chr(0200) + s
-    return s
-
-def uts(s, encoding, errors):
-    """Convert a unicode object to a string.
-    """
-    if errors == "utf-8":
-        # An extra error handler similar to the -o invalid=UTF-8 option
-        # in POSIX.1-2001. Replace untranslatable characters with their
-        # UTF-8 representation.
-        try:
-            return s.encode(encoding, "strict")
-        except UnicodeEncodeError:
-            x = []
-            for c in s:
-                try:
-                    x.append(c.encode(encoding, "strict"))
-                except UnicodeEncodeError:
-                    x.append(c.encode("utf8"))
-            return "".join(x)
     else:
-        return s.encode(encoding, errors)
+        raise ValueError("overflow in number field")
+
+    return s
 
 def calc_chksums(buf):
     """Calculate the checksum for a member's header by summing up all
@@ -256,8 +239,8 @@ def calc_chksums(buf):
        the high bit set. So we calculate two checksums, unsigned and
        signed.
     """
-    unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
-    signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
+    unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
+    signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
     return unsigned_chksum, signed_chksum
 
 def copyfileobj(src, dst, length=None):
@@ -272,60 +255,25 @@ def copyfileobj(src, dst, length=None):
 
     BUFSIZE = 16 * 1024
     blocks, remainder = divmod(length, BUFSIZE)
-    for b in xrange(blocks):
+    for b in range(blocks):
         buf = src.read(BUFSIZE)
         dst.write(buf)
         if len(buf) < BUFSIZE:
-            raise IOError("end of file reached")
+            raise OSError("end of file reached")
 
     if remainder != 0:
         buf = src.read(remainder)
         dst.write(buf)
         if len(buf) < remainder:
-            raise IOError("end of file reached")
+            raise OSError("end of file reached")
     return
 
-filemode_table = (
-    ((S_IFLNK,      "l"),
-     (S_IFREG,      "-"),
-     (S_IFBLK,      "b"),
-     (S_IFDIR,      "d"),
-     (S_IFCHR,      "c"),
-     (S_IFIFO,      "p")),
-
-    ((TUREAD,       "r"),),
-    ((TUWRITE,      "w"),),
-    ((TUEXEC|TSUID, "s"),
-     (TSUID,        "S"),
-     (TUEXEC,       "x")),
-
-    ((TGREAD,       "r"),),
-    ((TGWRITE,      "w"),),
-    ((TGEXEC|TSGID, "s"),
-     (TSGID,        "S"),
-     (TGEXEC,       "x")),
-
-    ((TOREAD,       "r"),),
-    ((TOWRITE,      "w"),),
-    ((TOEXEC|TSVTX, "t"),
-     (TSVTX,        "T"),
-     (TOEXEC,       "x"))
-)
-
 def filemode(mode):
-    """Convert a file's mode to a string of the form
-       -rwxrwxrwx.
-       Used by TarFile.list()
-    """
-    perm = []
-    for table in filemode_table:
-        for bit, char in table:
-            if mode & bit == bit:
-                perm.append(char)
-                break
-        else:
-            perm.append("-")
-    return "".join(perm)
+    """Deprecated in this location; use stat.filemode."""
+    import warnings
+    warnings.warn("deprecated in favor of stat.filemode",
+                  DeprecationWarning, 2)
+    return stat.filemode(mode)
 
 class TarError(Exception):
     """Base exception."""
@@ -334,7 +282,7 @@ class ExtractError(TarError):
     """General exception for extract errors."""
     pass
 class ReadError(TarError):
-    """Exception for unreadble tar archives."""
+    """Exception for unreadable tar archives."""
     pass
 class CompressionError(TarError):
     """Exception for unavailable compression methods."""
@@ -377,7 +325,7 @@ class _LowLevelFile:
         }[mode]
         if hasattr(os, "O_BINARY"):
             _mode |= os.O_BINARY
-        self.fd = os.open(name, _mode, 0666)
+        self.fd = os.open(name, _mode, 0o666)
         self.offset = 0
 
     def close(self):
@@ -427,52 +375,77 @@ class _Stream:
         self.comptype = comptype
         self.fileobj  = fileobj
         self.bufsize  = bufsize
-        self.buf      = ""
-        self.pos      = 0L
-        self.concat_pos = 0L
+        self.buf      = b""
+        self.pos      = 0
+        self.concat_pos = 0
         self.closed   = False
-        self.flags    = 0L
-        self.internal_pos = 0L
+        self.flags    = 0
+        self.internal_pos = 0
         self.concat_stream = concat_stream
         self.enctype  = enctype
         self.key_length = key_length
         self.password = password
-        self.last_block_offset = 0L
-        self.dbuf     = ""
-        self.aes_buf  = ""
+        self.last_block_offset = 0
+        self.dbuf     = b""
+        self.aes_buf  = b""
+        self.exception = None
 
-        if comptype == "gz":
-            try:
-                import zlib
-            except ImportError:
-                raise CompressionError("zlib module is not available")
-            self.zlib = zlib
-            if mode == "r":
-                if self.enctype == 'aes':
-                    self.encryption = aescrypto.AESCrypt(self.password,
-                                                         key_length=self.key_length)
-                self._init_read_gz()
-            else:
-                self._init_write_gz()
-            self.crc = zlib.crc32("") & 0xffffffffL
+        try:
+            if comptype == "gz":
+                try:
+                    import zlib
+                except ImportError:
+                    raise CompressionError("zlib module is not available")
+                self.zlib = zlib
+                if mode == "r":
+                    if self.enctype == 'aes':
+                        self.encryption = aescrypto.AESCrypt(self.password,
+                                                            key_length=self.key_length)
+                    self._init_read_gz()
+                    self.exception = zlib.error
+                else:
+                    self._init_write_gz()
+                self.crc = zlib.crc32(b"")
 
-        elif comptype == "bz2":
-            try:
-                import bz2
-            except ImportError:
-                raise CompressionError("bz2 module is not available")
-            if mode == "r":
-                self.dbuf = ""
-                self.cmp = bz2.BZ2Decompressor()
-            else:
-                self.cmp = bz2.BZ2Compressor()
+            elif comptype == "bz2":
+                try:
+                    import bz2
+                except ImportError:
+                    raise CompressionError("bz2 module is not available")
+                if mode == "r":
+                    self.dbuf = b""
+                    self.cmp = bz2.BZ2Decompressor()
+                    self.exception = OSError
+                else:
+                    self.cmp = bz2.BZ2Compressor()
 
-        elif self.enctype == 'aes':
-            self.encryption = aescrypto.AESCrypt(self.password,
-                                                key_length=self.key_length)
-            if mode != "r":
-                self.encryption.init()
-                self.__write_to_file(self.encryption.salt_str)
+            elif comptype == 'xz':
+                    try:
+                        import lzma
+                    except ImportError:
+                        raise CompressionError("lzma module is not available")
+                    if mode == "r":
+                        self.dbuf = b""
+                        self.cmp = lzma.LZMADecompressor()
+                        self.exception = lzma.LZMAError
+                    else:
+                        self.cmp = lzma.LZMACompressor()
+
+            elif self.enctype == 'aes':
+                self.encryption = aescrypto.AESCrypt(self.password,
+                                                    key_length=self.key_length)
+                if mode != "r":
+                    self.encryption.init()
+                    self.__write_to_file(self.encryption.salt_str)
+
+            elif comptype != "tar":
+                raise CompressionError("unknown compression type %r" % comptype)
+
+        except:
+            if not self._extfileobj:
+                self.fileobj.close()
+            self.closed = True
+            raise
 
     def __del__(self):
         if hasattr(self, "closed") and not self.closed:
@@ -493,14 +466,15 @@ class _Stream:
             self.encryption.init()
             self.__write_to_file(self.encryption.salt_str)
 
-        timestamp = struct.pack("<L", long(time.time()))
-        self.__write("\037\213\010\010%s\002\377" % timestamp)
-        if type(self.name) is unicode:
+        timestamp = struct.pack("<L", int(time.time()))
+        self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
+        if type(self.name) is str:
             self.name = self.name.encode("iso-8859-1", "replace")
-        if self.name.endswith(".aes128") or self.name.endswith(".aes256"):
+        if self.name.endswith(b".aes128") or self.name.endswith(b".aes256"):
             self.name = self.name[:-7]
-        if self.name.endswith(".gz"):
+        if self.name.endswith(b".gz"):
             self.name = self.name[:-3]
+        # RFC1952 says we must use ISO-8859-1 for the FNAME field.
         self.__write(self.name + NUL)
 
     def new_compression_block(self):
@@ -522,8 +496,8 @@ class _Stream:
         '''
         self.close(close_fileobj=False)
         self.closed = False
-        self.concat_pos = 0L
-        self.crc = self.zlib.crc32("") & 0xffffffffL
+        self.concat_pos = 0
+        self.crc = self.zlib.crc32(b"")
         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
                                             -self.zlib.MAX_WBITS,
                                             self.zlib.DEF_MEM_LEVEL,
@@ -535,8 +509,8 @@ class _Stream:
         elif set_last_block_offset:
             self.last_block_offset = self.fileobj.tell()
 
-        timestamp = struct.pack("<L", long(time.time()))
-        self.__write("\037\213\010\000%s\002\377" % timestamp)
+        timestamp = struct.pack("<L", int(time.time()))
+        self.__write(b"\037\213\010\000" + timestamp + b"\002\377")
 
 
     def _new_aes_block(self, set_last_block_offset=False):
@@ -558,7 +532,7 @@ class _Stream:
         """Write string s to the stream.
         """
         if self.comptype == "gz":
-            self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
+            self.crc = self.zlib.crc32(s, self.crc)
         self.pos += len(s)
         self.concat_pos += len(s)
         if self.comptype != "tar":
@@ -602,7 +576,7 @@ class _Stream:
 
         if self.mode == "w" and self.buf:
             chunk = self.buf
-            self.buf = ""
+            self.buf = b""
             if self.comptype == "gz":
                 # The native zlib crc is an unsigned 32-bit integer, but
                 # the Python wrapper implicitly casts that to a signed C
@@ -610,8 +584,8 @@ class _Stream:
                 # while the same crc on a 64-bit box may "look positive".
                 # To avoid irksome warnings from the `struct` module, force
                 # it to look positive on all boxes.
-                chunk += struct.pack("<L", self.crc & 0xffffffffL)
-                chunk += struct.pack("<L", self.concat_pos & 0xffffFFFFL)
+                chunk += struct.pack("<L", self.crc & 0xffffffff)
+                chunk += struct.pack("<L", self.concat_pos & 0xffffFFFF)
             self.__enc_write(chunk)
 
         if close_fileobj and not self._extfileobj:
@@ -623,7 +597,7 @@ class _Stream:
         if not close_fileobj and self.mode == "r" and self.comptype == "gz":
             read_crc = self.__read(4)
             read_length = self.__read(4)
-            calculated_crc = self.crc & 0xffffffffL
+            calculated_crc = self.crc
             if struct.unpack("<L", read_crc)[0] != calculated_crc:
                 raise CompressionError("bad gzip crc")
 
@@ -636,11 +610,11 @@ class _Stream:
 
         # taken from gzip.GzipFile with some alterations
         read2 = self.__read(2)
-        if read2 != "\037\213":
+        if read2 != b"\037\213":
             raise ReadError("not a gzip file")
 
         read1 = self.__read(1)
-        if read1 != "\010":
+        if read1 != b"\010":
             raise CompressionError("unsupported compression method")
 
         self.flags = flag = ord(self.__read(1))
@@ -673,7 +647,7 @@ class _Stream:
         """
         if pos - self.pos >= 0:
             blocks, remainder = divmod(pos - self.pos, self.bufsize)
-            for i in xrange(blocks):
+            for i in range(blocks):
                 self.read(self.bufsize)
             self.read(remainder)
         else:
@@ -702,8 +676,8 @@ class _Stream:
         """Reads just one line, new line character included
         """
         # if \n in dbuf, no read neads to be done
-        if '\n' in self.dbuf:
-            pos = self.dbuf.index('\n') + 1
+        if b'\n' in self.dbuf:
+            pos = self.dbuf.index(b'\n') + 1
             ret = self.dbuf[:pos]
             self.dbuf = self.dbuf[pos:]
             return ret
@@ -714,14 +688,14 @@ class _Stream:
 
             # nothing more to read, so return the buffer
             if not chunk:
-                return ''.join(buf)
+                return b''.join(buf)
 
             buf.append(chunk)
 
             # if \n found, return the new line
-            if '\n' in chunk:
-                dbuf = ''.join(buf)
-                pos = dbuf.index('\n') + 1
+            if b'\n' in chunk:
+                dbuf = b''.join(buf)
+                pos = dbuf.index(b'\n') + 1
                 self.dbuf = dbuf[pos:] + self.dbuf
                 return dbuf[:pos]
 
@@ -740,14 +714,17 @@ class _Stream:
                     buf = self.cmp.decompress(buf)
                 except IOError:
                     raise ReadError("invalid compressed data")
-                except Exception, e:
+                except Exception as e:
                     # happens at the end of the file
                     # _init_read_gz failed in the previous iteration so
                     # sel.cmp.descompress fails here
-                    pass
+                    if self.concat_stream:
+                        pass
+                    else:
+                        raise ReadError("invalid compressed data")
 
                 if self.comptype == "gz" and hasattr(self, "crc"):
-                    self.crc = self.zlib.crc32(buf, self.crc) & 0xffffffffL
+                    self.crc = self.zlib.crc32(buf, self.crc)
                 if self.concat_stream and len(self.cmp.unused_data) != 0:
                     self.buf = self.cmp.unused_data + self.buf
                     self.close(close_fileobj=False)
@@ -756,11 +733,11 @@ class _Stream:
                     except:
                         # happens at the end of the file
                         pass
-                    self.crc = self.zlib.crc32("") & 0xffffffffL
+                    self.crc = self.zlib.crc32(b"")
                     self.closed = False
             t.append(buf)
             c += len(buf)
-        t = "".join(t)
+        t = b"".join(t)
         self.dbuf = t[size:]
         return t[:size]
 
@@ -777,7 +754,7 @@ class _Stream:
             t.append(buf)
             c += len(buf)
             self.internal_pos += len(buf)
-        t = "".join(t)
+        t = b"".join(t)
         self.buf = t[size:]
 
         return t[:size]
@@ -815,15 +792,15 @@ class _Stream:
         chars because the file is decrypted in multiples of the key size.
         '''
         if self.enctype == 'aes':
-            kl = self.key_length/8
+            kl = int(self.key_length/8)
             buf = self.fileobj.read(size - kl)
             last = len(buf) < (size - kl)
             buf = self.aes_buf + buf
-            self.aes_buf = ""
+            self.aes_buf = b""
 
             # prevent setting last to False when it shouldn't
             if not last:
-                last = buf[-kl:].startswith('Salted__')
+                last = buf[-kl:].startswith(b'Salted__')
                 self.aes_buf = buf[-kl:]
                 buf = buf[:-kl]
 
@@ -837,7 +814,7 @@ class _Stream:
             return buf
 
         try:
-            idx = buf.index('Salted__')
+            idx = buf.index(b'Salted__')
         except ValueError:
             buf = self.encryption.decrypt(buf, last)
         else:
@@ -846,7 +823,7 @@ class _Stream:
             if b1:
                 buf = self.encryption.decrypt(b1, True)
             else:
-                buf = ''
+                buf = b''
 
             self.encryption.get_salt_str(b2)
             self.encryption.init()
@@ -869,78 +846,19 @@ class _StreamProxy(object):
         return self.buf
 
     def getcomptype(self):
-        if self.buf.startswith("\037\213\010"):
+        if self.buf.startswith(b"\x1f\x8b\x08"):
             return "gz"
-        if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY":
+        elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
             return "bz2"
-        return "tar"
+        elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
+            return "xz"
+        else:
+            return "tar"
 
     def close(self):
         self.fileobj.close()
 # class StreamProxy
 
-class _BZ2Proxy(object):
-    """Small proxy class that enables external file object
-       support for "r:bz2" and "w:bz2" modes. This is actually
-       a workaround for a limitation in bz2 module's BZ2File
-       class which (unlike gzip.GzipFile) has no support for
-       a file object argument.
-    """
-
-    blocksize = 16 * 1024
-
-    def __init__(self, fileobj, mode):
-        self.fileobj = fileobj
-        self.mode = mode
-        self.name = getattr(self.fileobj, "name", None)
-        self.init()
-
-    def init(self):
-        import bz2
-        self.pos = 0
-        if self.mode == "r":
-            self.bz2obj = bz2.BZ2Decompressor()
-            self.fileobj.seek(0)
-            self.buf = ""
-        else:
-            self.bz2obj = bz2.BZ2Compressor()
-
-    def read(self, size):
-        b = [self.buf]
-        x = len(self.buf)
-        while x < size:
-            raw = self.fileobj.read(self.blocksize)
-            if not raw:
-                break
-            data = self.bz2obj.decompress(raw)
-            b.append(data)
-            x += len(data)
-        self.buf = "".join(b)
-
-        buf = self.buf[:size]
-        self.buf = self.buf[size:]
-        self.pos += len(buf)
-        return buf
-
-    def seek(self, pos):
-        if pos < self.pos:
-            self.init()
-        self.read(pos - self.pos)
-
-    def tell(self):
-        return self.pos
-
-    def write(self, data):
-        self.pos += len(data)
-        raw = self.bz2obj.compress(data)
-        self.fileobj.write(raw)
-
-    def close(self):
-        if self.mode == "w":
-            raw = self.bz2obj.flush()
-            self.fileobj.write(raw)
-# class _BZ2Proxy
-
 #------------------------
 # Extraction file object
 #------------------------
@@ -950,22 +868,63 @@ class _FileInFile(object):
        object.
     """
 
-    def __init__(self, fileobj, offset, size, sparse=None):
+    def __init__(self, fileobj, offset, size, blockinfo=None):
         self.fileobj = fileobj
         self.offset = offset
         self.size = size
-        self.sparse = sparse
         self.position = 0
+        self.name = getattr(fileobj, "name", None)
+        self.closed = False
+
+        if blockinfo is None:
+            blockinfo = [(0, size)]
+
+        # Construct a map with data and zero blocks.
+        self.map_index = 0
+        self.map = []
+        lastpos = 0
+        realpos = self.offset
+        for offset, size in blockinfo:
+            if offset > lastpos:
+                self.map.append((False, lastpos, offset, None))
+            self.map.append((True, offset, offset + size, realpos))
+            realpos += size
+            lastpos = offset + size
+        if lastpos < self.size:
+            self.map.append((False, lastpos, self.size, None))
+
+    def flush(self):
+        pass
+
+    def readable(self):
+        return True
+
+    def writable(self):
+        return False
+
+    def seekable(self):
+        return self.fileobj.seekable()
 
     def tell(self):
         """Return the current file position.
         """
         return self.position
 
-    def seek(self, position):
+    def seek(self, position, whence=io.SEEK_SET):
         """Seek to a position in the file.
         """
-        self.position = position
+        if whence == io.SEEK_SET:
+            self.position = min(max(position, 0), self.size)
+        elif whence == io.SEEK_CUR:
+            if position < 0:
+                self.position = max(self.position + position, 0)
+            else:
+                self.position = min(self.position + position, self.size)
+        elif whence == io.SEEK_END:
+            self.position = max(min(self.size + position, self.size), 0)
+        else:
+            raise ValueError("Invalid argument")
+        return self.position
 
     def read(self, size=None):
         """Read data from the file.
@@ -975,177 +934,42 @@ class _FileInFile(object):
         else:
             size = min(size, self.size - self.position)
 
-        if self.sparse is None:
-            return self.readnormal(size)
-        else:
-            return self.readsparse(size)
-
-    def readnormal(self, size):
-        """Read operation for regular files.
-        """
-        self.fileobj.seek(self.offset + self.position)
-        self.position += size
-        return self.fileobj.read(size)
-
-    def readsparse(self, size):
-        """Read operation for sparse files.
-        """
-        data = []
+        buf = b""
         while size > 0:
-            buf = self.readsparsesection(size)
-            if not buf:
-                break
-            size -= len(buf)
-            data.append(buf)
-        return "".join(data)
-
-    def readsparsesection(self, size):
-        """Read a single section of a sparse file.
-        """
-        section = self.sparse.find(self.position)
-
-        if section is None:
-            return ""
-
-        size = min(size, section.offset + section.size - self.position)
-
-        if isinstance(section, _data):
-            realpos = section.realpos + self.position - section.offset
-            self.fileobj.seek(self.offset + realpos)
-            self.position += size
-            return self.fileobj.read(size)
-        else:
-            self.position += size
-            return NUL * size
-#class _FileInFile
-
-
-class ExFileObject(object):
-    """File-like object for reading an archive member.
-       Is returned by TarFile.extractfile().
-    """
-    blocksize = 1024
-
-    def __init__(self, tarfile, tarinfo):
-        self.fileobj = _FileInFile(tarfile.fileobj,
-                                   tarinfo.offset_data,
-                                   tarinfo.size,
-                                   getattr(tarinfo, "sparse", None))
-        self.name = tarinfo.name
-        self.mode = "r"
-        self.closed = False
-        self.size = tarinfo.size
-
-        self.position = 0
-        self.buffer = ""
-
-    def read(self, size=None):
-        """Read at most size bytes from the file. If size is not
-           present or None, read all data until EOF is reached.
-        """
-        if self.closed:
-            raise ValueError("I/O operation on closed file")
-
-        buf = ""
-        if self.buffer:
-            if size is None:
-                buf = self.buffer
-                self.buffer = ""
-            else:
-                buf = self.buffer[:size]
-                self.buffer = self.buffer[size:]
-
-        if size is None:
-            buf += self.fileobj.read()
-        else:
-            buf += self.fileobj.read(size - len(buf))
-
-        self.position += len(buf)
-        return buf
-
-    def readline(self, size=-1):
-        """Read one entire line from the file. If size is present
-           and non-negative, return a string with at most that
-           size, which may be an incomplete line.
-        """
-        if self.closed:
-            raise ValueError("I/O operation on closed file")
-
-        if "\n" in self.buffer:
-            pos = self.buffer.find("\n") + 1
-        else:
-            buffers = [self.buffer]
             while True:
-                buf = self.fileobj.read(self.blocksize)
-                buffers.append(buf)
-                if not buf or "\n" in buf:
-                    self.buffer = "".join(buffers)
-                    pos = self.buffer.find("\n") + 1
-                    if pos == 0:
-                        # no newline found.
-                        pos = len(self.buffer)
+                data, start, stop, offset = self.map[self.map_index]
+                if start <= self.position < stop:
                     break
-
-        if size != -1:
-            pos = min(size, pos)
-
-        buf = self.buffer[:pos]
-        self.buffer = self.buffer[pos:]
-        self.position += len(buf)
-        return buf
-
-    def readlines(self):
-        """Return a list with all remaining lines.
-        """
-        result = []
-        while True:
-            line = self.readline()
-            if not line: break
-            result.append(line)
-        return result
-
-    def tell(self):
-        """Return the current file position.
-        """
-        if self.closed:
-            raise ValueError("I/O operation on closed file")
-
-        return self.position
-
-    def seek(self, pos, whence=os.SEEK_SET):
-        """Seek to a position in the file.
-        """
-        if self.closed:
-            raise ValueError("I/O operation on closed file")
-
-        if whence == os.SEEK_SET:
-            self.position = min(max(pos, 0), self.size)
-        elif whence == os.SEEK_CUR:
-            if pos < 0:
-                self.position = max(self.position + pos, 0)
+                else:
+                    self.map_index += 1
+                    if self.map_index == len(self.map):
+                        self.map_index = 0
+            length = min(size, stop - self.position)
+            if data:
+                self.fileobj.seek(offset + (self.position - start))
+                buf += self.fileobj.read(length)
             else:
-                self.position = min(self.position + pos, self.size)
-        elif whence == os.SEEK_END:
-            self.position = max(min(self.size + pos, self.size), 0)
-        else:
-            raise ValueError("Invalid argument")
+                buf += NUL * length
+            size -= length
+            self.position += length
+        return buf
 
-        self.buffer = ""
-        self.fileobj.seek(self.position)
+    def readinto(self, b):
+        buf = self.read(len(b))
+        b[:len(buf)] = buf
+        return len(buf)
 
     def close(self):
-        """Close the file object.
-        """
         self.closed = True
+#class _FileInFile
 
-    def __iter__(self):
-        """Get an iterator over the file's lines.
-        """
-        while True:
-            line = self.readline()
-            if not line:
-                break
-            yield line
+
+class ExFileObject(io.BufferedReader):
+
+    def __init__(self, tarfile, tarinfo):
+        fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
+                tarinfo.size, tarinfo.sparse)
+        super().__init__(fileobj)
 #class ExFileObject
 
 #------------------
@@ -1159,12 +983,18 @@ class TarInfo(object):
        usually created internally.
     """
 
+    __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
+                 "chksum", "type", "linkname", "uname", "gname",
+                 "devmajor", "devminor", "volume_offset",
+                 "offset", "offset_data", "pax_headers", "sparse",
+                 "tarfile", "_sparse_structs", "_link_target")
+
     def __init__(self, name=""):
         """Construct a TarInfo object. name is the optional name
            of the member.
         """
         self.name = name        # member name
-        self.mode = 0644        # file permissions
+        self.mode = 0o644       # file permissions
         self.uid = 0            # user id
         self.gid = 0            # group id
         self.size = 0           # file size
@@ -1182,6 +1012,7 @@ class TarInfo(object):
         self.volume_offset = 0  # the file's data corresponds with the data
                                 # starting at this position
 
+        self.sparse = None      # sparse member information
         self.pax_headers = {}   # pax header information
 
     # In pax headers the "name" and "linkname" field are called
@@ -1201,12 +1032,12 @@ class TarInfo(object):
     def __repr__(self):
         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
 
-    def get_info(self, encoding, errors):
+    def get_info(self, encoding=None, errors=None):
         """Return the TarInfo's attributes as a dictionary.
         """
         info = {
             "name":     self.name,
-            "mode":     self.mode & 07777,
+            "mode":     self.mode & 0o7777,
             "uid":      self.uid,
             "gid":      self.gid,
             "size":     self.size,
@@ -1225,27 +1056,24 @@ class TarInfo(object):
         if info["type"] == DIRTYPE and not info["name"].endswith("/"):
             info["name"] += "/"
 
-        for key in ("name", "linkname", "uname", "gname"):
-            if type(info[key]) is unicode:
-                info[key] = info[key].encode(encoding, errors)
-
         return info
 
-    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
+    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING,
+              errors="surrogateescape"):
         """Return a tar header as a string of 512 byte blocks.
         """
         info = self.get_info(encoding, errors)
 
         if format == USTAR_FORMAT:
-            return self.create_ustar_header(info)
+            return self.create_ustar_header(info, encoding, errors)
         elif format == GNU_FORMAT:
-            return self.create_gnu_header(info)
+            return self.create_gnu_header(info, encoding, errors)
         elif format == PAX_FORMAT:
             return self.create_pax_header(info, encoding, errors)
         else:
             raise ValueError("invalid format")
 
-    def create_ustar_header(self, info):
+    def create_ustar_header(self, info, encoding, errors):
         """Return the object as a ustar header block.
         """
         info["magic"] = POSIX_MAGIC
@@ -1256,9 +1084,9 @@ class TarInfo(object):
         if len(info["name"]) > LENGTH_NAME:
             info["prefix"], info["name"] = self._posix_split_name(info["name"])
 
-        return self._create_header(info, USTAR_FORMAT)
+        return self._create_header(info, USTAR_FORMAT, encoding, errors)
 
-    def create_gnu_header(self, info):
+    def create_gnu_header(self, info, encoding, errors):
         """Return the object as a GNU header block sequence.
         """
         info["magic"] = GNU_MAGIC
@@ -1270,17 +1098,19 @@ class TarInfo(object):
                 itn(self.volume_offset, 12, GNU_FORMAT),
                 itn(0, 119, GNU_FORMAT), # stuff unused in this tar implementation, set to zero
             ]
-            info['prefix'] = "".join(prefix)
+            info['prefix'] = b"".join(prefix)
             info['size'] = info['size'] - self.volume_offset
 
-        buf = ""
+        buf = b""
         if len(info["linkname"]) > LENGTH_LINK:
-            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
+            buf += self._create_gnu_long_header(info["linkname"],
+                GNUTYPE_LONGLINK, encoding, errors)
 
         if len(info["name"]) > LENGTH_NAME:
-            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
+            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME,
+                                                encoding, errors)
 
-        return buf + self._create_header(info, GNU_FORMAT)
+        return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
 
     def create_pax_header(self, info, encoding, errors):
         """Return the object as a ustar header block. If it cannot be
@@ -1304,17 +1134,15 @@ class TarInfo(object):
                 # The pax header has priority.
                 continue
 
-            val = info[name].decode(encoding, errors)
-
             # Try to encode the string as ASCII.
             try:
-                val.encode("ascii")
+                info[name].encode("ascii", "strict")
             except UnicodeEncodeError:
-                pax_headers[hname] = val
+                pax_headers[hname] = info[name]
                 continue
 
             if len(info[name]) > length:
-                pax_headers[hname] = val
+                pax_headers[hname] = info[name]
 
         # Test number fields for values that exceed the field limit or values
         # that like to be stored as float.
@@ -1326,22 +1154,22 @@ class TarInfo(object):
 
             val = info[name]
             if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
-                pax_headers[name] = unicode(val)
+                pax_headers[name] = str(val)
                 info[name] = 0
 
         # Create a pax extended header if necessary.
         if pax_headers:
-            buf = self._create_pax_generic_header(pax_headers)
+            buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
         else:
-            buf = ""
+            buf = b""
 
-        return buf + self._create_header(info, USTAR_FORMAT)
+        return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
 
     @classmethod
     def create_pax_global_header(cls, pax_headers):
         """Return the object as a pax global header block sequence.
         """
-        return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
+        return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
 
     def _posix_split_name(self, name):
         """Split a name longer than 100 chars into a prefix
@@ -1359,31 +1187,31 @@ class TarInfo(object):
         return prefix, name
 
     @staticmethod
-    def _create_header(info, format):
+    def _create_header(info, format, encoding, errors):
         """Return a header block. info is a dictionary with file
            information, format must be one of the *_FORMAT constants.
         """
         parts = [
-            stn(info.get("name", ""), 100),
-            itn(info.get("mode", 0) & 07777, 8, format),
+            stn(info.get("name", ""), 100, encoding, errors),
+            itn(info.get("mode", 0) & 0o7777, 8, format),
             itn(info.get("uid", 0), 8, format),
             itn(info.get("gid", 0), 8, format),
             itn(info.get("size", 0), 12, format),
             itn(info.get("mtime", 0), 12, format),
-            "        ", # checksum field
+            b"        ", # checksum field
             info.get("type", REGTYPE),
-            stn(info.get("linkname", ""), 100),
-            stn(info.get("magic", POSIX_MAGIC), 8),
-            stn(info.get("uname", ""), 32),
-            stn(info.get("gname", ""), 32),
+            stn(info.get("linkname", ""), 100, encoding, errors),
+            info.get("magic", POSIX_MAGIC),
+            stn(info.get("uname", ""), 32, encoding, errors),
+            stn(info.get("gname", ""), 32, encoding, errors),
             itn(info.get("devmajor", 0), 8, format),
             itn(info.get("devminor", 0), 8, format),
-            stn(info.get("prefix", ""), 155)
+            sbtn(info.get("prefix", ""), 155, encoding, errors)
         ]
 
-        buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
+        buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
-        buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
+        buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
         return buf
 
     @staticmethod
@@ -1397,11 +1225,11 @@ class TarInfo(object):
         return payload
 
     @classmethod
-    def _create_gnu_long_header(cls, name, type):
+    def _create_gnu_long_header(cls, name, type, encoding, errors):
         """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
            for name.
         """
-        name += NUL
+        name = name.encode(encoding, errors) + NUL
 
         info = {}
         info["name"] = "././@LongLink"
@@ -1410,19 +1238,39 @@ class TarInfo(object):
         info["magic"] = GNU_MAGIC
 
         # create extended header + name blocks.
-        return cls._create_header(info, USTAR_FORMAT) + \
+        return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
                 cls._create_payload(name)
 
     @classmethod
-    def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
-        """Return a POSIX.1-2001 extended or global header sequence
+    def _create_pax_generic_header(cls, pax_headers, type, encoding):
+        """Return a POSIX.1-2008 extended or global header sequence
            that contains a list of keyword, value pairs. The values
-           must be unicode objects.
+           must be strings.
         """
-        records = []
-        for keyword, value in pax_headers.iteritems():
-            keyword = keyword.encode("utf8")
-            value = value.encode("utf8")
+        # Check if one of the fields contains surrogate characters and thereby
+        # forces hdrcharset=BINARY, see _proc_pax() for more information.
+        binary = False
+        for keyword, value in pax_headers.items():
+            try:
+                value.encode("utf-8", "strict")
+            except UnicodeEncodeError:
+                binary = True
+                break
+
+        records = b""
+        if binary:
+            # Put the hdrcharset field at the beginning of the header.
+            records += b"21 hdrcharset=BINARY\n"
+
+        for keyword, value in pax_headers.items():
+            keyword = keyword.encode("utf-8")
+            if binary:
+                # Try to restore the original byte representation of `value'.
+                # Needless to say, that the encoding must match the string.
+                value = value.encode(encoding, "surrogateescape")
+            else:
+                value = value.encode("utf-8")
+
             l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
             n = p = 0
             while True:
@@ -1430,8 +1278,7 @@ class TarInfo(object):
                 if n == p:
                     break
                 p = n
-            records.append("%d %s=%s\n" % (p, keyword, value))
-        records = "".join(records)
+            records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
 
         # We use a hardcoded "././@PaxHeader" name like star does
         # instead of the one that POSIX recommends.
@@ -1442,12 +1289,12 @@ class TarInfo(object):
         info["magic"] = POSIX_MAGIC
 
         # Create pax header + record blocks.
-        return cls._create_header(info, USTAR_FORMAT) + \
+        return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
                 cls._create_payload(records)
 
     @classmethod
-    def frombuf(cls, buf):
-        """Construct a TarInfo object from a 512 byte string buffer.
+    def frombuf(cls, buf, encoding, errors):
+        """Construct a TarInfo object from a 512 byte bytes object.
         """
         if len(buf) == 0:
             raise EmptyHeaderError("empty header")
@@ -1461,8 +1308,7 @@ class TarInfo(object):
             raise InvalidHeaderError("bad checksum")
 
         obj = cls()
-        obj.buf = buf
-        obj.name = nts(buf[0:100])
+        obj.name = nts(buf[0:100], encoding, errors)
         obj.mode = nti(buf[100:108])
         obj.uid = nti(buf[108:116])
         obj.gid = nti(buf[116:124])
@@ -1470,12 +1316,30 @@ class TarInfo(object):
         obj.mtime = nti(buf[136:148])
         obj.chksum = chksum
         obj.type = buf[156:157]
-        obj.linkname = nts(buf[157:257])
-        obj.uname = nts(buf[265:297])
-        obj.gname = nts(buf[297:329])
+        obj.linkname = nts(buf[157:257], encoding, errors)
+        obj.uname = nts(buf[265:297], encoding, errors)
+        obj.gname = nts(buf[297:329], encoding, errors)
         obj.devmajor = nti(buf[329:337])
         obj.devminor = nti(buf[337:345])
-        prefix = nts(buf[345:500])
+        prefix = nts(buf[345:500], encoding, errors)
+
+        # The old GNU sparse format occupies some of the unused
+        # space in the buffer for up to 4 sparse structures.
+        # Save the them for later processing in _proc_sparse().
+        if obj.type == GNUTYPE_SPARSE:
+            pos = 386
+            structs = []
+            for i in range(4):
+                try:
+                    offset = nti(buf[pos:pos + 12])
+                    numbytes = nti(buf[pos + 12:pos + 24])
+                except ValueError:
+                    break
+                structs.append((offset, numbytes))
+                pos += 24
+            isextended = bool(buf[482])
+            origsize = nti(buf[483:495])
+            obj._sparse_structs = (structs, isextended, origsize)
 
         # Old V7 tar format represents a directory as a regular
         # file with a trailing slash.
@@ -1499,7 +1363,7 @@ class TarInfo(object):
            tarfile.
         """
         buf = tarfile.fileobj.read(BLOCKSIZE)
-        obj = cls.frombuf(buf)
+        obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
         obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
         return obj._proc_member(tarfile)
 
@@ -1560,71 +1424,43 @@ class TarInfo(object):
         # the longname information.
         next.offset = self.offset
         if self.type == GNUTYPE_LONGNAME:
-            next.name = nts(buf)
+            next.name = nts(buf, tarfile.encoding, tarfile.errors)
         elif self.type == GNUTYPE_LONGLINK:
-            next.linkname = nts(buf)
+            next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
 
         return next
 
     def _proc_sparse(self, tarfile):
         """Process a GNU sparse header plus extra headers.
         """
-        buf = self.buf
-        sp = _ringbuffer()
-        pos = 386
-        lastpos = 0L
-        realpos = 0L
-        # There are 4 possible sparse structs in the
-        # first header.
-        for i in xrange(4):
-            try:
-                offset = nti(buf[pos:pos + 12])
-                numbytes = nti(buf[pos + 12:pos + 24])
-            except ValueError:
-                break
-            if offset > lastpos:
-                sp.append(_hole(lastpos, offset - lastpos))
-            sp.append(_data(offset, numbytes, realpos))
-            realpos += numbytes
-            lastpos = offset + numbytes
-            pos += 24
-
-        isextended = ord(buf[482])
-        origsize = nti(buf[483:495])
-
-        # If the isextended flag is given,
-        # there are extra headers to process.
-        while isextended == 1:
+        # We already collected some sparse structures in frombuf().
+        structs, isextended, origsize = self._sparse_structs
+        del self._sparse_structs
+
+        # Collect sparse structures from extended header blocks.
+        while isextended:
             buf = tarfile.fileobj.read(BLOCKSIZE)
             pos = 0
-            for i in xrange(21):
+            for i in range(21):
                 try:
                     offset = nti(buf[pos:pos + 12])
                     numbytes = nti(buf[pos + 12:pos + 24])
                 except ValueError:
                     break
-                if offset > lastpos:
-                    sp.append(_hole(lastpos, offset - lastpos))
-                sp.append(_data(offset, numbytes, realpos))
-                realpos += numbytes
-                lastpos = offset + numbytes
+                if offset and numbytes:
+                    structs.append((offset, numbytes))
                 pos += 24
-            isextended = ord(buf[504])
-
-        if lastpos < origsize:
-            sp.append(_hole(lastpos, origsize - lastpos))
-
-        self.sparse = sp
+            isextended = bool(buf[504])
+        self.sparse = structs
 
         self.offset_data = tarfile.fileobj.tell()
         tarfile.offset = self.offset_data + self._block(self.size)
         self.size = origsize
-
         return self
 
     def _proc_pax(self, tarfile):
         """Process an extended or global header as described in
-           POSIX.1-2001.
+           POSIX.1-2008.
         """
         # Read the header information.
         buf = tarfile.fileobj.read(self._block(self.size))
@@ -1637,11 +1473,29 @@ class TarInfo(object):
         else:
             pax_headers = tarfile.pax_headers.copy()
 
+        # Check if the pax header contains a hdrcharset field. This tells us
+        # the encoding of the path, linkpath, uname and gname fields. Normally,
+        # these fields are UTF-8 encoded but since POSIX.1-2008 tar
+        # implementations are allowed to store them as raw binary strings if
+        # the translation to UTF-8 fails.
+        match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
+        if match is not None:
+            pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
+
+        # For the time being, we don't care about anything other than "BINARY".
+        # The only other value that is currently allowed by the standard is
+        # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
+        hdrcharset = pax_headers.get("hdrcharset")
+        if hdrcharset == "BINARY":
+            encoding = tarfile.encoding
+        else:
+            encoding = "utf-8"
+
         # Parse pax header information. A record looks like that:
         # "%d %s=%s\n" % (length, keyword, value). length is the size
         # of the complete record including the length field itself and
         # the newline. keyword and value are both UTF-8 encoded strings.
-        regex = re.compile(r"(\d+) ([^=]+)=", re.U)
+        regex = re.compile(br"(\d+) ([^=]+)=")
         pos = 0
         while True:
             match = regex.match(buf, pos)
@@ -1652,8 +1506,21 @@ class TarInfo(object):
             length = int(length)
             value = buf[match.end(2) + 1:match.start(1) + length - 1]
 
-            keyword = keyword.decode("utf8")
-            value = value.decode("utf8")
+            # Normally, we could just use "utf-8" as the encoding and "strict"
+            # as the error handler, but we better not take the risk. For
+            # example, GNU tar <= 1.23 is known to store filenames it cannot
+            # translate to UTF-8 as raw strings (unfortunately without a
+            # hdrcharset=BINARY header).
+            # We first try the strict standard encoding, and if that fails we
+            # fall back on the user's encoding and error handler.
+            keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
+                    tarfile.errors)
+            if keyword in PAX_NAME_FIELDS:
+                value = self._decode_pax_field(value, encoding, tarfile.encoding,
+                        tarfile.errors)
+            else:
+                value = self._decode_pax_field(value, "utf-8", "utf-8",
+                        tarfile.errors)
 
             pax_headers[keyword] = value
             pos += length
@@ -1665,6 +1532,19 @@ class TarInfo(object):
         except HeaderError:
             raise SubsequentHeaderError("missing or bad subsequent header")
 
+        # Process GNU sparse information.
+        if "GNU.sparse.map" in pax_headers:
+            # GNU extended sparse format version 0.1.
+            self._proc_gnusparse_01(next, pax_headers)
+
+        elif "GNU.sparse.size" in pax_headers:
+            # GNU extended sparse format version 0.0.
+            self._proc_gnusparse_00(next, pax_headers, buf)
+
+        elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
+            # GNU extended sparse format version 1.0.
+            self._proc_gnusparse_10(next, pax_headers, tarfile)
+
         if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
             # Patch the TarInfo object with the extended header info.
             next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
@@ -1693,29 +1573,70 @@ class TarInfo(object):
 
         return next
 
-    def _apply_pax_info(self, pax_headers, encoding, errors):
-        """Replace fields with supplemental information from a previous
-           pax extended or global header.
+    def _proc_gnusparse_00(self, next, pax_headers, buf):
+        """Process a GNU tar extended sparse header, version 0.0.
         """
-        for keyword, value in pax_headers.iteritems():
-            if keyword not in PAX_FIELDS:
-                continue
+        offsets = []
+        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
+            offsets.append(int(match.group(1)))
+        numbytes = []
+        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
+            numbytes.append(int(match.group(1)))
+        next.sparse = list(zip(offsets, numbytes))
 
-            if keyword == "path":
-                value = value.rstrip("/")
+    def _proc_gnusparse_01(self, next, pax_headers):
+        """Process a GNU tar extended sparse header, version 0.1.
+        """
+        sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
+        next.sparse = list(zip(sparse[::2], sparse[1::2]))
 
-            if keyword in PAX_NUMBER_FIELDS:
-                try:
-                    value = PAX_NUMBER_FIELDS[keyword](value)
-                except ValueError:
-                    value = 0
-            else:
-                value = uts(value, encoding, errors)
+    def _proc_gnusparse_10(self, next, pax_headers, tarfile):
+        """Process a GNU tar extended sparse header, version 1.0.
+        """
+        fields = None
+        sparse = []
+        buf = tarfile.fileobj.read(BLOCKSIZE)
+        fields, buf = buf.split(b"\n", 1)
+        fields = int(fields)
+        while len(sparse) < fields * 2:
+            if b"\n" not in buf:
+                buf += tarfile.fileobj.read(BLOCKSIZE)
+            number, buf = buf.split(b"\n", 1)
+            sparse.append(int(number))
+        next.offset_data = tarfile.fileobj.tell()
+        next.sparse = list(zip(sparse[::2], sparse[1::2]))
 
-            setattr(self, keyword, value)
+    def _apply_pax_info(self, pax_headers, encoding, errors):
+        """Replace fields with supplemental information from a previous
+           pax extended or global header.
+        """
+        for keyword, value in pax_headers.items():
+            if keyword == "GNU.sparse.name":
+                setattr(self, "path", value)
+            elif keyword == "GNU.sparse.size":
+                setattr(self, "size", int(value))
+            elif keyword == "GNU.sparse.realsize":
+                setattr(self, "size", int(value))
+            elif keyword in PAX_FIELDS:
+                if keyword in PAX_NUMBER_FIELDS:
+                    try:
+                        value = PAX_NUMBER_FIELDS[keyword](value)
+                    except ValueError:
+                        value = 0
+                if keyword == "path":
+                    value = value.rstrip("/")
+                setattr(self, keyword, value)
 
         self.pax_headers = pax_headers.copy()
 
+    def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
+        """Decode a single field from a pax record.
+        """
+        try:
+            return value.decode(encoding, "strict")
+        except UnicodeDecodeError:
+            return value.decode(fallback_encoding, fallback_errors)
+
     def _block(self, count):
         """Round up a byte count by BLOCKSIZE and return it,
            e.g. _block(834) => 1024.
@@ -1742,7 +1663,7 @@ class TarInfo(object):
     def isfifo(self):
         return self.type == FIFOTYPE
     def issparse(self):
-        return self.type == GNUTYPE_SPARSE
+        return self.sparse is not None
     def isdev(self):
         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
     def ismultivol(self):
@@ -1783,7 +1704,7 @@ class TarFile(object):
 
     tarinfo = TarInfo           # The default TarInfo class to use.
 
-    fileobject = ExFileObject   # The default ExFileObject class to use.
+    fileobject = ExFileObject   # The file-object for extractfile().
 
     concat_compression = False  # Used to separate in different zip members each
                                 # file, used for robustness.
@@ -1796,8 +1717,8 @@ class TarFile(object):
 
     def __init__(self, name=None, mode="r", fileobj=None, format=None,
             tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
-            errors=None, pax_headers=None, debug=None, errorlevel=None,
-            max_volume_size=None, new_volume_handler=None,
+            errors="surrogateescape", pax_headers=None, debug=None,
+             errorlevel=None, max_volume_size=None, new_volume_handler=None,
             concat_compression=False, password='', save_to_members=True):
         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
            read from an existing archive, 'a' to append data to an existing
@@ -1825,9 +1746,10 @@ class TarFile(object):
             if name is None and hasattr(fileobj, "name"):
                 name = fileobj.name
             # when fileobj is a gzip.GzipFile, fileobj.mode is an int (not valid for us)
-            if hasattr(fileobj, "mode") and isinstance(fileobj.mode, basestring):
+            if hasattr(fileobj, "mode") and isinstance(fileobj.mode, str):
                 self._mode = fileobj.mode
             self._extfileobj = True
+        self.name = os.path.abspath(name) if name else None
         self.base_name = self.name = os.path.abspath(name) if name else None
         self.fileobj = fileobj
 
@@ -1843,12 +1765,7 @@ class TarFile(object):
         if encoding is not None:
             self.encoding = encoding
 
-        if errors is not None:
-            self.errors = errors
-        elif mode == "r":
-            self.errors = "utf-8"
-        else:
-            self.errors = "strict"
+        self.errors = errors
 
         if pax_headers is not None and self.format == PAX_FORMAT:
             self.pax_headers = pax_headers
@@ -1893,7 +1810,7 @@ class TarFile(object):
                     except EOFHeaderError:
                         self.fileobj.seek(self.offset)
                         break
-                    except HeaderError, e:
+                    except HeaderError as e:
                         raise ReadError(str(e))
 
             if self.mode in "aw":
@@ -1909,18 +1826,6 @@ class TarFile(object):
             self.closed = True
             raise
 
-    def _getposix(self):
-        return self.format == USTAR_FORMAT
-    def _setposix(self, value):
-        import warnings
-        warnings.warn("use the format attribute instead", DeprecationWarning,
-                      2)
-        if value:
-            self.format = USTAR_FORMAT
-        else:
-            self.format = GNU_FORMAT
-    posix = property(_getposix, _setposix)
-
     #--------------------------------------------------------------------------
     # Below are the classmethods which act as alternate constructors to the
     # TarFile class. The open() method is the only one that is needed for
@@ -1942,18 +1847,22 @@ class TarFile(object):
            'r:'         open for reading exclusively uncompressed
            'r:gz'       open for reading with gzip compression
            'r:bz2'      open for reading with bzip2 compression
+           'r:xz'       open for reading with lzma compression
            'a' or 'a:'  open for appending, creating the file if necessary
            'w' or 'w:'  open for writing without compression
            'w:gz'       open for writing with gzip compression
            'w:bz2'      open for writing with bzip2 compression
+           'w:xz'       open for writing with lzma compression
 
            'r|*'        open a stream of tar blocks with transparent compression
            'r|'         open an uncompressed stream of tar blocks for reading
            'r|gz'       open a gzip compressed stream of tar blocks
            'r|bz2'      open a bzip2 compressed stream of tar blocks
+           'r|xz'       open an lzma compressed stream of tar blocks
            'w|'         open an uncompressed stream for writing
            'w|gz'       open a gzip compressed stream for writing
            'w|bz2'      open a bzip2 compressed stream for writing
+           'w|xz'       open an lzma compressed stream for writing
 
            'r#gz'       open a stream of gzip compressed tar blocks for reading
            'w#gz'       open a stream of gzip compressed tar blocks for writing
@@ -1978,7 +1887,7 @@ class TarFile(object):
                     saved_pos = fileobj.tell()
                 try:
                     return func(name, "r", fileobj, **kwargs)
-                except (ReadError, CompressionError):
+                except (ReadError, CompressionError) as e:
                     if fileobj is not None:
                         fileobj.seek(saved_pos)
                     continue
@@ -2048,11 +1957,14 @@ class TarFile(object):
 
             kwargs['concat_compression'] = True
 
-            t = cls(name, filemode,
-                    _Stream(name, filemode, comptype, fileobj, bufsize,
-                            concat_stream=True, enctype=enctype,
-                            password=password, key_length=key_length),
-                    **kwargs)
+            stream = _Stream(name, filemode, comptype, fileobj, bufsize,
+                             concat_stream=True, enctype=enctype,
+                             password=password, key_length=key_length)
+            try:
+                t = cls(name, filemode, stream, **kwargs)
+            except:
+                stream.close()
+                raise
             t._extfileobj = False
             return t
 
@@ -2083,16 +1995,21 @@ class TarFile(object):
         except (ImportError, AttributeError):
             raise CompressionError("gzip module is not available")
 
-        if fileobj is None:
-            fileobj = bltn_open(name, mode + "b")
-
+        extfileobj = fileobj is not None
         try:
-            t = cls.taropen(name, mode,
-                gzip.GzipFile(name, mode, compresslevel, fileobj),
-                **kwargs)
-        except IOError:
+            fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
+            t = cls.taropen(name, mode, fileobj, **kwargs)
+        except OSError:
+            if not extfileobj and fileobj is not None:
+                fileobj.close()
+            if fileobj is None:
+                raise
             raise ReadError("not a gzip file")
-        t._extfileobj = False
+        except:
+            if not extfileobj and fileobj is not None:
+                fileobj.close()
+            raise
+        t._extfileobj = extfileobj
         return t
 
     @classmethod
@@ -2108,23 +2025,46 @@ class TarFile(object):
         except ImportError:
             raise CompressionError("bz2 module is not available")
 
-        if fileobj is not None:
-            fileobj = _BZ2Proxy(fileobj, mode)
-        else:
-            fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
+        fileobj = bz2.BZ2File(fileobj or name, mode,
+                              compresslevel=compresslevel)
 
         try:
             t = cls.taropen(name, mode, fileobj, **kwargs)
-        except (IOError, EOFError):
+        except (OSError, EOFError):
+            fileobj.close()
             raise ReadError("not a bzip2 file")
         t._extfileobj = False
         return t
 
+    @classmethod
+    def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
+        """Open lzma compressed tar archive name for reading or writing.
+           Appending is not allowed.
+        """
+        if mode not in ("r", "w"):
+            raise ValueError("mode must be 'r' or 'w'")
+
+        try:
+            import lzma
+        except ImportError:
+            raise CompressionError("lzma module is not available")
+
+        fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
+
+        try:
+            t = cls.taropen(name, mode, fileobj, **kwargs)
+        except (lzma.LZMAError, EOFError):
+            fileobj.close()
+            raise ReadError("not an lzma file")
+        t._extfileobj = False
+        return t
+
     # All *open() methods are registered here.
     OPEN_METH = {
         "tar": "taropen",   # uncompressed tar
         "gz":  "gzopen",    # gzip compressed tar
-        "bz2": "bz2open"    # bzip2 compressed tar
+        "bz2": "bz2open",   # bzip2 compressed tar
+        "xz":  "xzopen"     # lzma compressed tar
     }
 
     #--------------------------------------------------------------------------
@@ -2204,8 +2144,7 @@ class TarFile(object):
         if arcname is None:
             arcname = name
         drv, arcname = os.path.splitdrive(arcname)
-        if os.sep != "/":
-            arcname = arcname.replace(os.sep, "/")
+        arcname = arcname.replace(os.sep, "/")
         arcname = arcname.lstrip("/")
 
         # Now, fill the TarInfo object with
@@ -2262,7 +2201,7 @@ class TarFile(object):
         if type == REGTYPE:
             tarinfo.size = statres.st_size
         else:
-            tarinfo.size = 0L
+            tarinfo.size = 0
         tarinfo.mtime = statres.st_mtime
         tarinfo.type = type
         tarinfo.linkname = linkname
@@ -2292,27 +2231,27 @@ class TarFile(object):
 
         for tarinfo in self:
             if verbose:
-                print filemode(tarinfo.mode),
-                print "%s/%s" % (tarinfo.uname or tarinfo.uid,
-                                 tarinfo.gname or tarinfo.gid),
+                print(stat.filemode(tarinfo.mode), end=' ')
+                print("%s/%s" % (tarinfo.uname or tarinfo.uid,
+                                 tarinfo.gname or tarinfo.gid), end=' ')
                 if tarinfo.ischr() or tarinfo.isblk():
-                    print "%10s" % ("%d,%d" \
-                                    % (tarinfo.devmajor, tarinfo.devminor)),
+                    print("%10s" % ("%d,%d" \
+                                    % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
                 else:
-                    print "%10d" % tarinfo.size,
-                print "%d-%02d-%02d %02d:%02d:%02d" \
-                      % time.localtime(tarinfo.mtime)[:6],
+                    print("%10d" % tarinfo.size, end=' ')
+                print("%d-%02d-%02d %02d:%02d:%02d" \
+                      % time.localtime(tarinfo.mtime)[:6], end=' ')
 
-            print tarinfo.name + ("/" if tarinfo.isdir() else ""),
+            print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
 
             if verbose:
                 if tarinfo.issym():
-                    print "->", tarinfo.linkname,
+                    print("->", tarinfo.linkname, end=' ')
                 if tarinfo.islnk():
-                    print "link to", tarinfo.linkname,
-            print
+                    print("link to", tarinfo.linkname, end=' ')
+            print()
 
-    def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
+    def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
         """Add the file `name' to the archive. `name' may be any type of file
            (directory, fifo, symbolic link, etc.). If given, `arcname'
            specifies an alternative name for the file in the archive.
@@ -2368,18 +2307,18 @@ class TarFile(object):
             if recursive:
                 for f in os.listdir(name):
                     self.add(os.path.join(name, f), os.path.join(arcname, f),
-                            recursive, exclude, filter)
+                            recursive, exclude, filter=filter)
 
         else:
             self.addfile(tarinfo)
 
     def _size_left(self):
-        '''
-        Calculates size left, assumes self.max_volume_size is set
-        '''
+        """Calculates size left in a volume with a maximum volume size.
+        Assumes self.max_volume_size is set.
+        """
         size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset
         # limit size left to a discrete number of blocks, because we won't
-        # write only half a block when writing the end of a volume
+        # write only half a block when writting the end of a volume
         # and filling with zeros
         blocks, remainder = divmod(size_left, BLOCKSIZE)
         return blocks*BLOCKSIZE
@@ -2538,7 +2477,7 @@ class TarFile(object):
                     except EOFHeaderError:
                         self.fileobj.seek(self.offset)
                         break
-                    except HeaderError, e:
+                    except HeaderError as e:
                         raise ReadError(str(e))
 
             if self.mode in "aw":
@@ -2546,9 +2485,9 @@ class TarFile(object):
 
                 if  self.format == PAX_FORMAT:
                     volume_info = {
-                        "GNU.volume.filename": unicode(self.volume_tarinfo.name),
-                        "GNU.volume.size": unicode(self.volume_tarinfo.size - self.volume_tarinfo.volume_offset),
-                        "GNU.volume.offset": unicode(self.volume_tarinfo.volume_offset),
+                        "GNU.volume.filename": str(self.volume_tarinfo.name),
+                        "GNU.volume.size": str(self.volume_tarinfo.size - self.volume_tarinfo.volume_offset),
+                        "GNU.volume.offset": str(self.volume_tarinfo.volume_offset),
                     }
 
                     self.pax_headers.update(volume_info)
@@ -2585,11 +2524,12 @@ class TarFile(object):
                 # Extract directories with a safe mode.
                 directories.append(tarinfo)
                 tarinfo = copy.copy(tarinfo)
-                tarinfo.mode = 0700
-            self.extract(tarinfo, path)
+                tarinfo.mode = 0o0700
+            # Do not set_attrs directories, as we will do that further down
+            self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
 
         # Reverse sort directories.
-        directories.sort(key=operator.attrgetter('name'))
+        directories.sort(key=lambda a: a.name)
         directories.reverse()
 
         # Set correct owner, mtime and filemode on directories.
@@ -2599,21 +2539,22 @@ class TarFile(object):
                 self.chown(tarinfo, dirpath)
                 self.utime(tarinfo, dirpath)
                 self.chmod(tarinfo, dirpath)
-            except ExtractError, e:
+            except ExtractError as e:
                 if self.errorlevel > 1:
                     raise
                 else:
                     self._dbg(1, "tarfile: %s" % e)
 
-    def extract(self, member, path=""):
+    def extract(self, member, path="", set_attrs=True):
         """Extract a member from the archive to the current working directory,
            using its full name. Its file information is extracted as accurately
            as possible. `member' may be a filename or a TarInfo object. You can
-           specify a different directory using `path'.
+           specify a different directory using `path'. File attributes (owner,
+           mtime, mode) are set unless `set_attrs' is False.
         """
         self._check("r")
 
-        if isinstance(member, basestring):
+        if isinstance(member, str):
             tarinfo = self.getmember(member)
         else:
             tarinfo = member
@@ -2623,8 +2564,9 @@ class TarFile(object):
             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
 
         try:
-            self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
-        except EnvironmentError, e:
+            self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
+                                 set_attrs=set_attrs)
+        except EnvironmentError as e:
             if self.errorlevel > 0:
                 raise
             else:
@@ -2632,7 +2574,7 @@ class TarFile(object):
                     self._dbg(1, "tarfile: %s" % e.strerror)
                 else:
                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
-        except ExtractError, e:
+        except ExtractError as e:
             if self.errorlevel > 1:
                 raise
             else:
@@ -2640,24 +2582,19 @@ class TarFile(object):
 
     def extractfile(self, member):
         """Extract a member from the archive as a file object. `member' may be
-           a filename or a TarInfo object. If `member' is a regular file, a
-           file-like object is returned. If `member' is a link, a file-like
-           object is constructed from the link's target. If `member' is none of
-           the above, None is returned.
-           The file-like object is read-only and provides the following
-           methods: read(), readline(), readlines(), seek() and tell()
+           a filename or a TarInfo object. If `member' is a regular file or a
+           link, an io.BufferedReader object is returned. Otherwise, None is
+           returned.
         """
         self._check("r")
 
-        if isinstance(member, basestring):
+        if isinstance(member, str):
             tarinfo = self.getmember(member)
         else:
             tarinfo = member
 
-        if tarinfo.isreg() or tarinfo.ismultivol():
-            return self.fileobject(self, tarinfo)
-
-        elif tarinfo.type not in SUPPORTED_TYPES:
+        if tarinfo.isreg() or tarinfo.ismultivol() or\
+            tarinfo.type not in SUPPORTED_TYPES:
             # If a member's type is unknown, it is treated as a
             # regular file.
             return self.fileobject(self, tarinfo)
@@ -2676,7 +2613,7 @@ class TarFile(object):
             # blkdev, etc.), return None instead of a file object.
             return None
 
-    def _extract_member(self, tarinfo, targetpath):
+    def _extract_member(self, tarinfo, targetpath, set_attrs=True):
         """Extract the TarInfo object tarinfo to a physical
            file called targetpath.
         """
@@ -2713,10 +2650,11 @@ class TarFile(object):
         else:
             self.makefile(tarinfo, targetpath)
 
-        self.chown(tarinfo, targetpath)
-        if not tarinfo.issym():
-            self.chmod(tarinfo, targetpath)
-            self.utime(tarinfo, targetpath)
+        if set_attrs:
+            self.chown(tarinfo, targetpath)
+            if not tarinfo.issym():
+                self.chmod(tarinfo, targetpath)
+                self.utime(tarinfo, targetpath)
 
     #--------------------------------------------------------------------------
     # Below are the different file methods. They are called via
@@ -2729,18 +2667,29 @@ class TarFile(object):
         try:
             # Use a safe mode for the directory, the real mode is set
             # later in _extract_member().
-            os.mkdir(targetpath, 0700)
-        except EnvironmentError, e:
-            if e.errno != errno.EEXIST:
-                raise
+            os.mkdir(targetpath, 0o0700)
+        except FileExistsError:
+            pass
 
     def makefile(self, tarinfo, targetpath):
         """Make a file called targetpath.
         """
-        source = self.extractfile(tarinfo)
+        source = self.fileobj
+        source.seek(tarinfo.offset_data)
         iterate = True
         target = bltn_open(targetpath, "wb")
 
+        if tarinfo.sparse is not None:
+            try:
+                for offset, size in tarinfo.sparse:
+                    target.seek(offset)
+                    copyfileobj(source, target, size)
+                target.seek(tarinfo.size)
+                target.truncate()
+            finally:
+                target.close()
+                return
+
         while iterate:
             iterate = False
             try:
@@ -2758,8 +2707,8 @@ class TarFile(object):
                 # to self.open_volume
                 self.volume_number += 1
                 self.new_volume_handler(self, self.base_name, self.volume_number)
-                tarinfo = self.next()
-                source = self.extractfile(tarinfo)
+                tarinfo = self.firstmember
+                source = self.fileobj
                 iterate = True
 
         target.close()
@@ -2801,23 +2750,21 @@ class TarFile(object):
           (platform limitation), we try to make a copy of the referenced file
           instead of a link.
         """
-        if hasattr(os, "symlink") and hasattr(os, "link"):
+        try:
             # For systems that support symbolic and hard links.
             if tarinfo.issym():
-                if os.path.lexists(targetpath):
-                    os.unlink(targetpath)
                 os.symlink(tarinfo.linkname, targetpath)
             else:
                 # See extract().
                 if os.path.exists(tarinfo._link_target):
-                    if os.path.lexists(targetpath):
-                        os.unlink(targetpath)
                     os.link(tarinfo._link_target, targetpath)
                 else:
-                    self._extract_member(self._find_link_target(tarinfo), targetpath)
-        else:
+                    self._extract_member(self._find_link_target(tarinfo),
+                                         targetpath)
+        except symlink_exception:
             try:
-                self._extract_member(self._find_link_target(tarinfo), targetpath)
+                self._extract_member(self._find_link_target(tarinfo),
+                                     targetpath)
             except KeyError:
                 raise ExtractError("unable to resolve link inside archive")
 
@@ -2838,9 +2785,8 @@ class TarFile(object):
                 if tarinfo.issym() and hasattr(os, "lchown"):
                     os.lchown(targetpath, u, g)
                 else:
-                    if sys.platform != "os2emx":
-                        os.chown(targetpath, u, g)
-            except EnvironmentError:
+                    os.chown(targetpath, u, g)
+            except OSError as e:
                 raise ExtractError("could not change owner")
 
     def chmod(self, tarinfo, targetpath):
@@ -2849,7 +2795,7 @@ class TarFile(object):
         if hasattr(os, 'chmod'):
             try:
                 os.chmod(targetpath, tarinfo.mode)
-            except EnvironmentError:
+            except OSError as e:
                 raise ExtractError("could not change mode")
 
     def utime(self, tarinfo, targetpath):
@@ -2859,7 +2805,7 @@ class TarFile(object):
             return
         try:
             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
-        except EnvironmentError:
+        except OSError as e:
             raise ExtractError("could not change modification time")
 
     #--------------------------------------------------------------------------
@@ -2874,21 +2820,18 @@ class TarFile(object):
             self.firstmember = None
             return m
 
-        # Read the next block, unless there's none
-        if isinstance(self.fileobj, _Stream) and self.offset < self.fileobj.pos:
-            return None
-        else:
-            self.fileobj.seek(self.offset)
+        # Read the next block.
+        self.fileobj.seek(self.offset)
         tarinfo = None
         while True:
             try:
                 tarinfo = self.tarinfo.fromtarfile(self)
-            except EOFHeaderError, e:
+            except EOFHeaderError as e:
                 if self.ignore_zeros:
                     self._dbg(2, "0x%X: %s" % (self.offset, e))
                     self.offset += BLOCKSIZE
                     continue
-            except InvalidHeaderError, e:
+            except InvalidHeaderError as e:
                 if self.ignore_zeros:
                     self._dbg(2, "0x%X: %s" % (self.offset, e))
                     self.offset += BLOCKSIZE
@@ -2898,10 +2841,10 @@ class TarFile(object):
             except EmptyHeaderError:
                 if self.offset == 0:
                     raise ReadError("empty file")
-            except TruncatedHeaderError, e:
+            except TruncatedHeaderError as e:
                 if self.offset == 0:
                     raise ReadError(str(e))
-            except SubsequentHeaderError, e:
+            except SubsequentHeaderError as e:
                 raise ReadError(str(e))
             break
 
@@ -2954,9 +2897,9 @@ class TarFile(object):
            corresponds to TarFile's mode.
         """
         if self.closed:
-            raise IOError("%s is closed" % self.__class__.__name__)
+            raise OSError("%s is closed" % self.__class__.__name__)
         if mode is not None and self.mode not in mode:
-            raise IOError("bad operation for mode %r" % self.mode)
+            raise OSError("bad operation for mode %r" % self.mode)
 
     def _find_link_target(self, tarinfo):
         """Find the target member of a symlink or hardlink member in the
@@ -2989,7 +2932,7 @@ class TarFile(object):
         """Write debugging output to sys.stderr.
         """
         if level <= self.debug:
-            print >> sys.stderr, msg
+            print(msg, file=sys.stderr)
 
     def __enter__(self):
         self._check()
@@ -3022,7 +2965,7 @@ class TarIter:
         """Return iterator object.
         """
         return self
-    def next(self):
+    def __next__(self):
         """Return the next item using TarFile's next() method.
            When all members have been read, set TarFile as _loaded.
         """
@@ -3045,102 +2988,6 @@ class TarIter:
 
         return tarinfo
 
-# Helper classes for sparse file support
-class _section:
-    """Base class for _data and _hole.
-    """
-    def __init__(self, offset, size):
-        self.offset = offset
-        self.size = size
-    def __contains__(self, offset):
-        return self.offset <= offset < self.offset + self.size
-
-class _data(_section):
-    """Represent a data section in a sparse file.
-    """
-    def __init__(self, offset, size, realpos):
-        _section.__init__(self, offset, size)
-        self.realpos = realpos
-
-class _hole(_section):
-    """Represent a hole section in a sparse file.
-    """
-    pass
-
-class _ringbuffer(list):
-    """Ringbuffer class which increases performance
-       over a regular list.
-    """
-    def __init__(self):
-        self.idx = 0
-    def find(self, offset):
-        idx = self.idx
-        while True:
-            item = self[idx]
-            if offset in item:
-                break
-            idx += 1
-            if idx == len(self):
-                idx = 0
-            if idx == self.idx:
-                # End of File
-                return None
-        self.idx = idx
-        return item
-
-#---------------------------------------------
-# zipfile compatible TarFile class
-#---------------------------------------------
-TAR_PLAIN = 0           # zipfile.ZIP_STORED
-TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
-class TarFileCompat:
-    """TarFile class compatible with standard module zipfile's
-       ZipFile class.
-    """
-    def __init__(self, file, mode="r", compression=TAR_PLAIN):
-        from warnings import warnpy3k
-        warnpy3k("the TarFileCompat class has been removed in Python 3.0",
-                stacklevel=2)
-        if compression == TAR_PLAIN:
-            self.tarfile = TarFile.taropen(file, mode)
-        elif compression == TAR_GZIPPED:
-            self.tarfile = TarFile.gzopen(file, mode)
-        else:
-            raise ValueError("unknown compression constant")
-        if mode[0:1] == "r":
-            members = self.tarfile.getmembers()
-            for m in members:
-                m.filename = m.name
-                m.file_size = m.size
-                m.date_time = time.gmtime(m.mtime)[:6]
-    def namelist(self):
-        return map(lambda m: m.name, self.infolist())
-    def infolist(self):
-        return filter(lambda m: m.type in REGULAR_TYPES,
-                      self.tarfile.getmembers())
-    def printdir(self):
-        self.tarfile.list()
-    def testzip(self):
-        return
-    def getinfo(self, name):
-        return self.tarfile.getmember(name)
-    def read(self, name):
-        return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
-    def write(self, filename, arcname=None, compress_type=None):
-        self.tarfile.add(filename, arcname)
-    def writestr(self, zinfo, bytes):
-        try:
-            from cStringIO import StringIO
-        except ImportError:
-            from StringIO import StringIO
-        import calendar
-        tinfo = TarInfo(zinfo.filename)
-        tinfo.size = len(bytes)
-        tinfo.mtime = calendar.timegm(zinfo.date_time)
-        self.tarfile.addfile(tinfo, StringIO(bytes))
-    def close(self):
-        self.tarfile.close()
-#class TarFileCompat
 
 #--------------------
 # exported functions
index 2bddd2a..1aa5c9a 100644 (file)
@@ -28,19 +28,19 @@ def split_file(separator, prefix, input_file, new_file_func=None):
     '''
     i = 0
     pos = 0
-    buf = ""
+    buf = b""
     sep_len = len(separator)
     if sep_len == 0:
         raise Exception("empty separator")
 
     if new_file_func is None:
-        new_file_func = lambda prefix, i: open(prefix + str(i), 'w')
+        new_file_func = lambda prefix, i: open(prefix + str(i), 'wb')
 
     output = new_file_func(prefix, i)
 
     # buffered search. we try not to have the while input file in memory, as
     # it's not needed
-    with open(input_file, 'r') as f:
+    with open(input_file, 'rb') as f:
         while True:
             buf += f.read(BUFSIZE)
             if len(buf) == 0:
@@ -71,13 +71,13 @@ def split_file(separator, prefix, input_file, new_file_func=None):
 
             # else: continue writing to the current output and iterate
             output.write(buf)
-            buf = ""
+            buf = b""
 
     output.close()
 
 def chunk_file(input_file, output_file, from_pos, to_pos):
-    ifl = open(input_file, 'r')
-    ofl = open(output_file, 'w')
+    ifl = open(input_file, 'rb')
+    ofl = open(output_file, 'wb')
 
     ifl.seek(from_pos)
     ofl.write(ifl.read(to_pos-from_pos))
@@ -99,4 +99,5 @@ if __name__ == "__main__":
         chunk_file(input_file=args.input_file, output_file=args.output,
                    from_pos=args.from_pos, to_pos=args.to_pos)
     else:
-        split_file(separator=args.separator, prefix=args.prefix, input_file=args.input_file)
+        split_file(separator=bytes(args.separator, 'UTF-8'), prefix=args.prefix,
+                   input_file=args.input_file)
index 9931f60..906fc27 100644 (file)
@@ -31,14 +31,14 @@ def rescue(tar_files, rescue_dir=None):
     format (compression, etc). Assumes it to be multivolume tar.
     '''
     # setup rescue_dir
-    if isinstance(tar_files, basestring):
+    if isinstance(tar_files, str):
         tar_files = [tar_files]
 
     if not isinstance(tar_files, list):
         raise Exception("tar_files must be a list")
 
     for f in tar_files:
-        if not isinstance(f, basestring):
+        if not isinstance(f, str):
             raise Exception("tar_files must be a list of strings")
         if not os.path.exists(f):
             raise Exception("tar file '%s' doesn't exist" % f)
@@ -71,12 +71,12 @@ def rescue(tar_files, rescue_dir=None):
             path = "%s.%d" %(prefix, context['num'])
             extract_files.append(path)
             context['num'] += 1
-            return open(path, 'w')
+            return open(path, 'wb')
         new_gz = partial(new_gz, context, extract_files)
 
         # split in compressed chunks
         for f in tar_files:
-            filesplit.split_file('\x1f\x8b',
+            filesplit.split_file(b'\x1f\x8b',
                 os.path.join(rescue_dir, base_name), f, new_gz)
 
     # includes volumes already extracted with new_volume_handler
old mode 100644 (file)
new mode 100755 (executable)
index 0df4650..0d7f99c
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Copyright (C) 2013 Intra2net AG
 #
index cd86323..dd27771 100644 (file)
@@ -42,9 +42,9 @@ class BaseTest(unittest.TestCase):
         file. File path and length are specified as function arguments.
         '''
         f = open(path, 'w')
-        s = string.lowercase + string.digits + "\n"
+        s = string.ascii_lowercase + string.digits + "\n"
         if len(s) < length:
-            s += s*(length/len(s))
+            s += s*int(length/len(s))
         data = s[:length]
         f.write(data)
         f.close()
index b2e1238..a594884 100644 (file)
@@ -93,7 +93,7 @@ class ConcatCompressTest(BaseTest):
         tarobj.close()
         os.unlink("big")
 
-        fo = open("sample.tar.gz", 'r')
+        fo = open("sample.tar.gz", 'rb')
         fo.seek(pos)
         tarobj = TarFile.open(mode="r#gz", fileobj=fo)
         tarobj.extract(tarobj.next())
@@ -130,7 +130,7 @@ class ConcatCompressTest(BaseTest):
         os.unlink("small2")
 
         # extract only the "small" file
-        fo = open("sample.tar.gz", 'r')
+        fo = open("sample.tar.gz", 'rb')
         fo.seek(pos)
         tarobj = TarFile.open(mode="r#gz", fileobj=fo)
         tarobj.extract(tarobj.next())
@@ -179,7 +179,7 @@ class ConcatCompressTest(BaseTest):
             tarobj.open_volume(volume_path)
 
         # extract only the "small" file
-        fo = open("sample.tar.gz", 'r')
+        fo = open("sample.tar.gz", 'rb')
         fo.seek(pos)
         tarobj = TarFile.open(mode="r#gz", fileobj=fo,
                               concat_compression=True,
@@ -226,7 +226,7 @@ class ConcatCompressTest(BaseTest):
         tarobj.extractall()
         tarobj.close()
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -263,7 +263,7 @@ class ConcatCompressTest(BaseTest):
         tarobj.extractall()
         tarobj.close()
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -305,7 +305,7 @@ class ConcatCompressTest(BaseTest):
         tarobj.close()
 
         # check output
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -337,7 +337,7 @@ class ConcatCompressTest(BaseTest):
         os.unlink("small")
         os.unlink("small2")
 
-        filesplit.split_file('\x1f\x8b', "sample.tar.gz.", "sample.tar.gz")
+        filesplit.split_file(b'\x1f\x8b', "sample.tar.gz.", "sample.tar.gz")
 
         assert os.path.exists("sample.tar.gz.0") # beginning of the tar file
         assert os.path.exists("sample.tar.gz.1") # first file
@@ -346,13 +346,13 @@ class ConcatCompressTest(BaseTest):
         assert not os.path.exists("sample.tar.gz.4") # nothing else
 
         # extract and check output
-        for i in xrange(1, 4):
+        for i in range(1, 4):
             tarobj = TarFile.open("sample.tar.gz.%d" % i,
                                 mode="r|gz")
             tarobj.extractall()
             tarobj.close()
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -395,11 +395,11 @@ class ConcatCompressTest(BaseTest):
         assert not os.path.exists("sample.tar.gz.4") # nothing else
 
         # extract and check output
-        for i in xrange(1, 4):
+        for i in range(1, 4):
             os.system("gzip -cd sample.tar.gz.%d > sample.%d.tar" % (i, i))
             os.system("tar xf sample.%d.tar" % i)
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -429,9 +429,9 @@ class ConcatCompressTest(BaseTest):
         assert os.path.exists("sample.tar.gz")
 
         # overwrite stuff in the middle of the big file
-        f = open('sample.tar.gz', 'r+b')
+        f = open('sample.tar.gz', 'wb')
         f.seek(100)
-        f.write("breaking things")
+        f.write(bytes("breaking things", 'UTF-8'))
         f.close()
 
         os.unlink("big")
@@ -439,7 +439,7 @@ class ConcatCompressTest(BaseTest):
         os.unlink("small2")
 
         # equivalent to $ python filesplit.py -s $'\x1f\x8b' -p sample.tar.gz. sample.tar.gz
-        filesplit.split_file('\x1f\x8b', "sample.tar.gz.", "sample.tar.gz")
+        filesplit.split_file(b'\x1f\x8b', "sample.tar.gz.", "sample.tar.gz")
 
         assert os.path.exists("sample.tar.gz.0") # beginning of the tar file
         assert os.path.exists("sample.tar.gz.1") # first file
@@ -448,7 +448,7 @@ class ConcatCompressTest(BaseTest):
         assert not os.path.exists("sample.tar.gz.4") # nothing else
 
         # extract and check output
-        for i in xrange(1, 4):
+        for i in range(1, 4):
             try:
                 tarobj = TarFile.open("sample.tar.gz.%d" % i,
                                     mode="r|gz")
@@ -460,7 +460,7 @@ class ConcatCompressTest(BaseTest):
                 else:
                     raise Exception("Error extracting a tar.gz not related to the broken 'big' file")
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             if key != "big":
                 assert os.path.exists(key)
                 assert value == self.md5sum(key)
index b5c6802..7c6e73f 100644 (file)
@@ -87,7 +87,7 @@ class DeltaTarTest(BaseTest):
         deltatar.restore_backup(target_path="source_dir",
                                 backup_tar_path=tar_path)
 
-        for key, value in self.hash.iteritems():
+        for key, value in self.hash.items():
             assert os.path.exists(key)
             if value:
                 assert value == self.md5sum(key)
@@ -108,19 +108,22 @@ class DeltaTarTest(BaseTest):
         index_filename = deltatar.index_name_func(True)
         index_path = os.path.join("backup_dir", index_filename)
 
-        f = open(index_path, 'r')
+        f = open(index_path, 'rb')
         crc = None
         checked = False
         began_list = False
-        for l in f.readline():
-            if 'BEGIN-FILE-LIST' in l:
+        while True:
+            l = f.readline()
+            if l == b'':
+                break
+            if b'BEGIN-FILE-LIST' in l:
                 crc = binascii.crc32(l)
                 began_list = True
-            elif 'END-FILE-LIST' in l:
+            elif b'END-FILE-LIST' in l:
                 crc = binascii.crc32(l, crc) & 0xffffffff
 
                 # next line contains the crc
-                data = json.loads(f.readline())
+                data = json.loads(f.readline().decode("UTF-8"))
                 assert data['type'] == 'file-list-checksum'
                 assert data['checksum'] == crc
                 checked = True
@@ -164,7 +167,7 @@ class DeltaTarTest(BaseTest):
         deltatar.restore_backup(target_path="source_dir2",
                                 backup_tar_path=tar_path)
 
-        for key, value in self.hash.iteritems():
+        for key, value in self.hash.items():
             assert os.path.exists(key)
             if value:
                 assert value == self.md5sum(key)
@@ -244,13 +247,13 @@ class DeltaTarTest(BaseTest):
             l = f.readline()
             if not len(l):
                 break
-            data = json.loads(l)
+            data = json.loads(l.decode('UTF-8'))
             if data.get('type', '') == 'file' and\
                     deltatar.unprefixed(data['path']) == "./huge":
                 offset = data['offset']
                 break
 
-        fo = open(tar_path, 'r')
+        fo = open(tar_path, 'rb')
         fo.seek(offset)
         def new_volume_handler(mode, tarobj, base_name, volume_number):
             tarobj.open_volume(datetime.now().strftime(
@@ -296,7 +299,7 @@ class DeltaTarTest(BaseTest):
         deltatar.restore_backup(target_path="source_dir",
             backup_indexes_paths=[index_path])
 
-        for key, value in self.hash.iteritems():
+        for key, value in self.hash.items():
             assert os.path.exists(key)
             if value:
                 assert value == self.md5sum(key)
@@ -327,7 +330,7 @@ class DeltaTarTest(BaseTest):
         deltatar.restore_backup(target_path="source_dir",
             backup_indexes_paths=[index_path])
 
-        for key, value in self.hash.iteritems():
+        for key, value in self.hash.items():
             assert os.path.exists(key)
             if value:
                 assert value == self.md5sum(key)
@@ -967,7 +970,7 @@ class DeltaTarTest(BaseTest):
         # create source_dir with the small file, that will be then deleted by
         # the restore_backup
         os.mkdir("source_dir")
-        open("source_dir/small", 'w').close()
+        open("source_dir/small", 'wb').close()
 
         tar_filename = deltatar.volume_name_func('backup_dir2',
             is_full=False, volume_number=0)
@@ -1221,19 +1224,19 @@ class DeltaTarTest(BaseTest):
         target_it = deltatar.jsonize_path_iterator(target_it, strip=1)
         while True:
             try:
-                sitem = source_it.next()
-                titem = target_it.next()
+                sitem = next(source_it)
+                titem = next(target_it)
             except StopIteration:
                 try:
-                    titem = target_it.next()
+                    titem = next(target_it)
                     raise Exception("iterators do not stop at the same time")
                 except StopIteration:
                     break
             try:
                 assert deltatar._equal_stat_dicts(sitem, titem)
-            except Exception, e:
-                print sitem
-                print titem
+            except Exception as e:
+                print(sitem)
+                print(titem)
                 raise e
 
 class DeltaTar2Test(DeltaTarTest):
index 03b597a..432acdf 100644 (file)
@@ -50,7 +50,7 @@ class EncryptionTest(BaseTest):
         os.unlink("big")
 
         # extract with normal tar and check output
-        filesplit.split_file('Salted__', "sample.tar.gz.aes.", "sample.tar.gz.aes128")
+        filesplit.split_file(b'Salted__', "sample.tar.gz.aes.", "sample.tar.gz.aes128")
 
         assert os.path.exists("sample.tar.gz.aes.0") # beginning of the tar file
         assert os.path.exists("sample.tar.gz.aes.1") # first file
@@ -89,7 +89,7 @@ class EncryptionTest(BaseTest):
             os.unlink(k)
 
         # extract with normal tar and check output
-        filesplit.split_file('Salted__', "sample.tar.gz.aes.", "sample.tar.gz.aes128")
+        filesplit.split_file(b'Salted__', "sample.tar.gz.aes.", "sample.tar.gz.aes128")
 
         assert os.path.exists("sample.tar.gz.aes.0") # beginning of the tar file
         assert os.path.exists("sample.tar.gz.aes.1") # first file
@@ -98,13 +98,13 @@ class EncryptionTest(BaseTest):
         assert not os.path.exists("sample.tar.gz.aes.4") # nothing else
 
         # extract and check output
-        for i in xrange(1, 4):
+        for i in range(1, 4):
             fname = "sample.tar.gz.aes.%d" % i
             os.system("openssl aes-128-cbc -nopad -k 'key' -d -in %s -out sample.tar.gz" % fname)
             os.system("zcat sample.tar.gz 2>/dev/null > sample.tar")
             os.system("tar xf sample.tar")
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -170,7 +170,7 @@ class EncryptionTest(BaseTest):
         tarobj.extractall()
         tarobj.close()
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -211,7 +211,7 @@ class EncryptionTest(BaseTest):
         tarobj.close()
 
         # check output
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -252,7 +252,7 @@ class EncryptionTest(BaseTest):
         tarobj.close()
 
         # check output
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -279,7 +279,7 @@ class EncryptionTest(BaseTest):
         os.unlink("big")
 
         # extract with normal tar and check output
-        filesplit.split_file('Salted__', "sample.tar.gz.aes.", "sample.tar.gz.aes256")
+        filesplit.split_file(b'Salted__', "sample.tar.gz.aes.", "sample.tar.gz.aes256")
 
         assert os.path.exists("sample.tar.gz.aes.0") # beginning of the tar file
         assert os.path.exists("sample.tar.gz.aes.1") # first file
index b08656d..0389139 100644 (file)
@@ -164,7 +164,7 @@ class MultivolGnuFormatTest(BaseTest):
 
         # extract with normal tar and check output
         os.system("tar xfM sample.tar --file=sample.tar.1 --file=sample.tar.2")
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -250,7 +250,7 @@ class MultivolGnuFormatTest(BaseTest):
         assert os.path.exists("sample.tar.1")
         assert not os.path.exists("sample.tar.2")
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             os.unlink(key)
             assert not os.path.exists(key)
 
@@ -261,7 +261,7 @@ class MultivolGnuFormatTest(BaseTest):
         tarobj.extractall()
         tarobj.close()
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -304,7 +304,7 @@ class MultivolGnuFormatTest(BaseTest):
         tarobj.extractall()
         tarobj.close()
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -344,7 +344,7 @@ class MultivolGnuFormatTest(BaseTest):
         tarobj.extractall()
         tarobj.close()
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -474,7 +474,7 @@ class MultivolGnuFormatTest(BaseTest):
         assert os.path.exists("sample.tar")
         assert not os.path.exists("sample.tar.1")
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             os.unlink(key)
             assert not os.path.exists(key)
 
@@ -485,7 +485,7 @@ class MultivolGnuFormatTest(BaseTest):
         tarobj.extractall()
         tarobj.close()
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -515,7 +515,7 @@ class MultivolGnuFormatTest(BaseTest):
         assert os.path.exists("sample.tar.1")
         assert not os.path.exists("sample.tar.2")
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             os.unlink(key)
             assert not os.path.exists(key)
 
@@ -526,7 +526,7 @@ class MultivolGnuFormatTest(BaseTest):
         tarobj.extractall()
         tarobj.close()
 
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
index e9c48eb..cb0764f 100644 (file)
@@ -61,7 +61,7 @@ class RescueTarTest(BaseTest):
         rescue_tar.rescue("sample.tar.gz")
 
         # check output
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             assert os.path.exists(key)
             assert value == self.md5sum(key)
 
@@ -95,7 +95,7 @@ class RescueTarTest(BaseTest):
         # overwrite stuff in the middle of the big file
         f = open('sample.tar.gz', 'r+b')
         f.seek(100)
-        f.write("breaking things")
+        f.write(bytes("breaking things", "UTF-8"))
         f.close()
 
         os.unlink("big")
@@ -107,7 +107,7 @@ class RescueTarTest(BaseTest):
         rescue_tar.rescue("sample.tar.gz")
 
         # check output
-        for key, value in hash.iteritems():
+        for key, value in hash.items():
             if key == "big":
                 continue
             assert os.path.exists(key)