From be60ffd0598fec172eccb69f3c6a8433af6fb643 Mon Sep 17 00:00:00 2001 From: Eduardo Robles Elvira Date: Mon, 4 Nov 2013 08:50:55 +0100 Subject: [PATCH] initial port to python 3, not finished --- deltatar/aescrypto.py | 81 ++- deltatar/deltatar.py | 96 ++-- deltatar/tarfile.py | 1443 +++++++++++++++++---------------------- filesplit.py | 15 +- rescue_tar.py | 8 +- runtests.py | 2 +- testing/__init__.py | 4 +- testing/test_concat_compress.py | 32 +- testing/test_deltatar.py | 39 +- testing/test_encryption.py | 16 +- testing/test_multivol.py | 18 +- testing/test_rescue_tar.py | 6 +- 12 files changed, 813 insertions(+), 947 deletions(-) mode change 100644 => 100755 runtests.py diff --git a/deltatar/aescrypto.py b/deltatar/aescrypto.py index bb8cbf8..2dce10c 100644 --- a/deltatar/aescrypto.py +++ b/deltatar/aescrypto.py @@ -1,20 +1,30 @@ -#!/usr/bin/env python - +#!/usr/bin/env python3 +#------------------------------------------------------------------- +# aescrypto.py +#------------------------------------------------------------------- # Copyright (C) 2013 Intra2net AG +# All rights reserved. # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published -# by the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. # -# You should have received a copy of the GNU General Public License -# along with this program. If not, see -# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. # Author: Daniel Garcia @@ -30,11 +40,14 @@ from hashlib import md5 # we ignore the PowmInsecureWarning warning given by libgmp4 because it doesn't # affect our code import warnings -from Crypto.pct_warnings import PowmInsecureWarning -warnings.simplefilter("ignore", PowmInsecureWarning) +try: + from Crypto.pct_warnings import PowmInsecureWarning + warnings.simplefilter("ignore", PowmInsecureWarning) -from Crypto.Cipher import AES -from Crypto import Random + from Crypto.Cipher import AES + from Crypto import Random +except: + pass class AESCrypt: @@ -42,17 +55,19 @@ class AESCrypt: This class provides a simple method to encrypt and decrypt text using AES. ''' - def __init__(self, password, salt='', key_length=128): + def __init__(self, password, salt=b'', key_length=128): self.bs = AES.block_size self.mode = AES.MODE_CBC if key_length not in [128, 256]: raise Exception('Invalid key_length, only 128 and 256 allowed') - self.key_length = key_length/8 - self.buf = '' + self.key_length = int(key_length/8) + self.buf = b'' if salt: self.salt = salt else: - self.salt = Random.new().read(self.bs - len('Salted__')) + self.salt = Random.new().read(self.bs - len(b'Salted__')) + if isinstance(password, str): + password = bytes(password, 'UTF-8') self.password = password self.get_pad = self.get_random_pad @@ -68,7 +83,7 @@ class AESCrypt: ''' self.derive_key_and_iv() self.cipher = AES.new(self.key, self.mode, self.iv) - self.salt_str = 'Salted__' + self.salt + self.salt_str = b'Salted__' + self.salt def close_enc(self): ''' @@ -78,7 +93,7 @@ class AESCrypt: returns the encrypted text ''' chunk = self.buf - self.buf = '' + self.buf = b'' need_padding = len(chunk) % self.bs != 0 padding_length = self.bs - len(chunk) % self.bs chunk += self.get_pad(padding_length) @@ -94,10 +109,10 @@ class AESCrypt: self.buf += chunk if len(self.buf) % self.bs == 0: cipher = self.cipher.encrypt(self.buf) - self.buf = '' + self.buf = b'' return cipher - cipher = '' + cipher = b'' while len(self.buf) >= self.bs: chunk = self.buf[:self.bs] self.buf = self.buf[self.bs:] @@ -128,19 +143,19 @@ class AESCrypt: ''' Calculates the salt for an input encrypted file ''' - self.salt = instream.read(self.bs)[len('Salted__'):] + self.salt = instream.read(self.bs)[len(b'Salted__'):] def get_salt_str(self, instr): ''' Calculates the salt for an input encrypted string ''' - self.salt = instr[len('Salted__'):self.bs] + self.salt = instr[len(b'Salted__'):self.bs] def derive_key_and_iv(self): ''' Generates the key and iv using the password and salt as seed ''' - d = d_i = '' + d = d_i = b'' l = self.key_length + self.bs while len(d) < l: d_i = md5(d_i + self.password + self.salt).digest() @@ -152,19 +167,19 @@ class AESCrypt: ''' Returns an ISO_10126 pad, which is random ''' - return Random.new().read(padding_length - 1) + chr(padding_length) + return Random.new().read(padding_length - 1) + bytes([padding_length]) def split_random_pad(self, chunk): ''' Returns the chunk without the ISO_10126 pad ''' - return chunk[:-ord(chunk[-1])] + return chunk[:-chunk[-1]] def get_pkcs5_pad(self, padding_length): ''' Returns the PKCS pad ''' - return padding_length * chr(padding_length) + return padding_length * bytes([padding_length]) def split_pkcs5_pad(self, chunk): ''' @@ -195,7 +210,7 @@ def decrypt(in_file, out_file, password): salt = aes.get_salt(in_file) aes.init() - next_chunk = '' + next_chunk = b'' finished = False while not finished: buf = in_file.read(1024 * aes.bs) @@ -215,4 +230,4 @@ if __name__ == '__main__': cipher.seek(0) decrypt(cipher, out, 'key') out.seek(0) - print out.read() + print(out.read()) diff --git a/deltatar/deltatar.py b/deltatar/deltatar.py index 1875df5..b19afa8 100644 --- a/deltatar/deltatar.py +++ b/deltatar/deltatar.py @@ -277,7 +277,7 @@ class DeltaTar(object): match = NO_MATCH for i in self.included_files: # it can be either a regexp or a string - if isinstance(i, basestring): + if isinstance(i, str): # if the string matches, then continue if i == path: match = MATCH @@ -322,7 +322,7 @@ class DeltaTar(object): if match != PARENT_MATCH: for e in self.excluded_files: # it can be either a regexp or a string - if isinstance(e, basestring): + if isinstance(e, str): # if the string matches, then exclude if e == path: return NO_MATCH @@ -404,7 +404,7 @@ class DeltaTar(object): return { u'type': ptype, - u'path': unicode(path), + u'path': path, u'mode': mode, u'mtime': int(stinfo.st_mtime), u'ctime': int(stinfo.st_ctime), @@ -506,10 +506,10 @@ class DeltaTar(object): backup in volumes. Optional (won't split in volumes by default). ''' # check input - if not isinstance(source_path, basestring): + if not isinstance(source_path, str): raise Exception('Source path must be a string') - if not isinstance(backup_path, basestring): + if not isinstance(backup_path, str): raise Exception('Backup path must be a string') if not os.path.exists(source_path) or not os.path.isdir(source_path): @@ -575,9 +575,9 @@ class DeltaTar(object): # wraps some args from context into the handler new_volume_handler = partial(new_volume_handler, self, cwd, backup_path) - index_fd.write('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full" }\n') + index_fd.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full" }\n', 'UTF-8')) - s = '{"type": "BEGIN-FILE-LIST"}\n' + s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8') # calculate checksum and write into the stream crc = binascii.crc32(s) index_fd.write(s) @@ -609,15 +609,15 @@ class DeltaTar(object): statd['offset'] = tarobj.get_last_member_offset() # store in the index the stat dict - s = json.dumps(statd) + '\n' + s = bytes(json.dumps(statd) + '\n', 'UTF-8') crc = binascii.crc32(s, crc) & 0xffffffff index_fd.write(s) - s = '{"type": "END-FILE-LIST"}\n' + s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8') crc = binascii.crc32(s, crc) & 0xffffffff index_fd.write(s) - index_fd.write('{"type": "file-list-checksum", "checksum": %d}\n' %\ - crc) + s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8') + index_fd.write(s) index_fd.close() os.chdir(cwd) tarobj.close() @@ -640,10 +640,10 @@ class DeltaTar(object): the index_mode setup in the constructor. ''' # check/sanitize input - if not isinstance(source_path, basestring): + if not isinstance(source_path, str): raise Exception('Source path must be a string') - if not isinstance(backup_path, basestring): + if not isinstance(backup_path, str): raise Exception('Backup path must be a string') if not os.path.exists(source_path) or not os.path.isdir(source_path): @@ -659,7 +659,7 @@ class DeltaTar(object): if max_volume_size != None: max_volume_size = max_volume_size*1024*1024 - if not isinstance(previous_index_path, basestring): + if not isinstance(previous_index_path, str): raise Exception('previous_index_path must be A string') if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path): @@ -721,9 +721,9 @@ class DeltaTar(object): # wraps some args from context into the handler new_volume_handler = partial(new_volume_handler, self, cwd, backup_path) - index_fd.write('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff" }\n') + index_fd.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff" }\n', 'UTF-8')) - s = '{"type": "BEGIN-FILE-LIST"}\n' + s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8') # calculate checksum and write into the stream crc = binascii.crc32(s) index_fd.write(s) @@ -784,7 +784,7 @@ class DeltaTar(object): if action == 'snapshot': # calculate stat dict for current file stat = dpath.copy() - stat['path'] = u"snapshot://" + dpath['path'] + stat['path'] = "snapshot://" + dpath['path'] stat['volume'] = self.vol_no # backup file @@ -794,7 +794,7 @@ class DeltaTar(object): stat['offset'] = tarobj.get_last_member_offset() # store in the index the stat dict - s = json.dumps(stat) + '\n' + s = bytes(json.dumps(stat) + '\n', 'UTF-8') crc = binascii.crc32(s, crc) & 0xffffffff index_fd.write(s) elif action == 'delete': @@ -807,7 +807,7 @@ class DeltaTar(object): tarobj.add("/dev/null", arcname=stat['path']) # store in the index the stat dict - s = json.dumps(stat) + '\n' + s = bytes(json.dumps(stat) + '\n', 'UTF-8') crc = binascii.crc32(s, crc) & 0xffffffff index_fd.write(s) elif action == 'list': @@ -816,15 +816,15 @@ class DeltaTar(object): # unchanged files do not enter in the backup, only in the index # store in the index the stat dict - s = json.dumps(stat) + '\n' + s = bytes(json.dumps(stat) + '\n', 'UTF-8') crc = binascii.crc32(s, crc) & 0xffffffff index_fd.write(s) - s = '{"type": "END-FILE-LIST"}\n' + s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8') crc = binascii.crc32(s, crc) & 0xffffffff index_fd.write(s) - index_fd.write('{"type": "file-list-checksum", "checksum": %d}\n' %\ - crc) + s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8') + index_fd.write(s) index_fd.close() index_it.release() os.chdir(cwd) @@ -879,13 +879,13 @@ class DeltaTar(object): self.f.close() self.f = None - def next(self): + def __next__(self): # read each file in the index and process it to do the retore j = {} l_no = -1 try: j, l_no = self.delta_tar._parse_json_line(self.f, l_no) - except Exception, e: + except Exception as e: if self.f: self.f.close() raise e @@ -903,7 +903,7 @@ class DeltaTar(object): self.delta_tar.logger.warn('unrecognized type to be ' 'restored: %s, line %d' % (op_type, l_no)) # iterate again - return self.next() + return self.__next__() return j, l_no @@ -951,11 +951,11 @@ class DeltaTar(object): self.tar_obj.close() self.tar_obj = None - def next(self): + def __next__(self): ''' Read each member and return it as a stat dict ''' - tarinfo = self.tar_obj.next() + tarinfo = self.tar_obj.__iter__().__next__() if not tarinfo or tarinfo == self.last_member: raise StopIteration @@ -993,7 +993,7 @@ class DeltaTar(object): ''' while True: try: - path = iter.next() + path = iter.__next__() if strip == 0: yield self._stat_dict(path) else: @@ -1016,7 +1016,7 @@ class DeltaTar(object): while True: if not elem1: try: - elem1, l_no = it1.next() + elem1, l_no = it1.__next__() except StopIteration: if elem2: yield (None, elem2, l_no) @@ -1027,7 +1027,7 @@ class DeltaTar(object): break if not elem2: try: - elem2 = it2.next() + elem2 = it2.__next__() if isinstance(elem2, tuple): elem2 = elem2[0] except StopIteration: @@ -1075,7 +1075,7 @@ class DeltaTar(object): return (index1, index2) def list_backup(self, backup_tar_path, list_func=None): - if not isinstance(backup_tar_path, basestring): + if not isinstance(backup_tar_path, str): raise Exception('Backup tar path must be a string') if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path): @@ -1114,7 +1114,7 @@ class DeltaTar(object): def filter(cls, list_func, tarinfo): if list_func is None: - print tarinfo.path + print(tarinfo.path) else: list_func(tarinfo) return False @@ -1148,7 +1148,7 @@ class DeltaTar(object): specified in the constructor. ''' # check/sanitize input - if not isinstance(target_path, basestring): + if not isinstance(target_path, str): raise Exception('Target path must be a string') if backup_indexes_paths is None and backup_tar_path == []: @@ -1160,7 +1160,7 @@ class DeltaTar(object): mode = "diff" if mode == "tar": - if not isinstance(backup_tar_path, basestring): + if not isinstance(backup_tar_path, str): raise Exception('Backup tar path must be a string') if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path): @@ -1179,7 +1179,7 @@ class DeltaTar(object): ' the open mode you provided is "%s"' % self.mode) for index in backup_indexes_paths: - if not isinstance(index, basestring): + if not isinstance(index, str): raise Exception('indices must be strings') if not os.path.exists(index) or not os.path.isfile(index): @@ -1238,8 +1238,8 @@ class DeltaTar(object): continue try: helper.restore(ipath, l_no) - except Exception, e: - print "FAILED to restore: ", ipath.get('path', '') + except Exception as e: + print("FAILED to restore: ", ipath.get('path', '')) continue # if both files are equal, we have nothing to restore @@ -1273,8 +1273,8 @@ class DeltaTar(object): l = f.readline() l_no += 1 try: - j = json.loads(l) - except ValueError, e: + j = json.loads(l.decode('UTF-8')) + except ValueError as e: raise Exception("error parsing this json line " "(line number %d): %s" % (l_no, l)) return j, l_no @@ -1467,7 +1467,7 @@ class RestoreHelper(object): # found in one index and we have to go to the next index. if data['iterator'] is None: it = data['iterator'] = self._deltatar.iterate_index_path(data["path"]) - d, l_no = it.next() + d, l_no = it.__next__() else: it = data['iterator'] d = data['last_itelement'] @@ -1490,7 +1490,7 @@ class RestoreHelper(object): return None, 0, '' try: - d, l_no = it.next() + d, l_no = it.__next__() except StopIteration: data['last_itelement'] = d data['last_lno'] = l_no @@ -1533,7 +1533,7 @@ class RestoreHelper(object): except EnvironmentError: raise tarfile.ExtractError("could not change owner") - except tarfile.ExtractError, e: + except tarfile.ExtractError as e: self._deltatar.logger.warn('tarfile: %s' % e) @staticmethod @@ -1576,7 +1576,7 @@ class RestoreHelper(object): vol_path = os.path.join(backup_path, vol_name) if index_data['vol_fd']: index_data['vol_fd'].close() - index_data['vol_fd'] = open(vol_path, 'r') + index_data['vol_fd'] = open(vol_path, 'rb') # force reopen of the tarobj because of new volume if index_data['tarobj']: @@ -1586,7 +1586,7 @@ class RestoreHelper(object): # seek tarfile if needed offset = file_data.get('offset', -1) if index_data['tarobj']: - member = index_data['tarobj'].next() + member = index_data['tarobj'].__iter__().__next__() if not member or member.path != file_data['path']: # force a seek and reopen index_data['tarobj'].close() @@ -1603,7 +1603,7 @@ class RestoreHelper(object): new_volume_handler=index_data['new_volume_handler'], save_to_members=False) - member = index_data['tarobj'].next() + member = index_data['tarobj'].__iter__().__next__() member.path = unprefixed_path member.name = unprefixed_path @@ -1611,7 +1611,7 @@ class RestoreHelper(object): if op_type == 'directory': self.add_member_dir(member) member = copy.copy(member) - member.mode = 0700 + member.mode = 0o0700 # if it's an existing directory, we then don't need to recreate it # just set the right permissions, mtime and that kind of stuff @@ -1640,5 +1640,5 @@ class RestoreHelper(object): class DirItem(object): def __init__(self, **kwargs): - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): setattr(self, k, v) \ No newline at end of file diff --git a/deltatar/tarfile.py b/deltatar/tarfile.py index 5dcfe9d..da65708 100644 --- a/deltatar/tarfile.py +++ b/deltatar/tarfile.py @@ -1,5 +1,4 @@ -#!/usr/bin/env python -# -*- coding: iso-8859-1 -*- +#!/usr/bin/env python3 #------------------------------------------------------------------- # tarfile.py #------------------------------------------------------------------- @@ -44,6 +43,7 @@ __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend, Eduardo Robl #--------- import sys import os +import io import shutil import stat import errno @@ -53,48 +53,59 @@ import copy import re import operator -import aescrypto +from . import aescrypto try: import grp, pwd except ImportError: grp = pwd = None +# os.symlink on Windows prior to 6.0 raises NotImplementedError +symlink_exception = (AttributeError, NotImplementedError) +try: + # OSError (winerror=1314) will be raised if the caller does not hold the + # SeCreateSymbolicLinkPrivilege privilege + symlink_exception += (OSError,) +except NameError: + pass + # from tarfile import * __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] +from builtins import open as _open # Since 'open' is TarFile.open + #--------------------------------------------------------- # tar constants #--------------------------------------------------------- -NUL = "\0" # the null character +NUL = b"\0" # the null character BLOCKSIZE = 512 # length of processing blocks RECORDSIZE = BLOCKSIZE * 20 # length of records -GNU_MAGIC = "ustar \0" # magic gnu tar string -POSIX_MAGIC = "ustar\x0000" # magic posix tar string +GNU_MAGIC = b"ustar \0" # magic gnu tar string +POSIX_MAGIC = b"ustar\x0000" # magic posix tar string LENGTH_NAME = 100 # maximum length of a filename LENGTH_LINK = 100 # maximum length of a linkname LENGTH_PREFIX = 155 # maximum length of the prefix field -REGTYPE = "0" # regular file -AREGTYPE = "\0" # regular file -LNKTYPE = "1" # link (inside tarfile) -SYMTYPE = "2" # symbolic link -CHRTYPE = "3" # character special device -BLKTYPE = "4" # block special device -DIRTYPE = "5" # directory -FIFOTYPE = "6" # fifo special device -CONTTYPE = "7" # contiguous file - -GNUTYPE_LONGNAME = "L" # GNU tar longname -GNUTYPE_LONGLINK = "K" # GNU tar longlink -GNUTYPE_SPARSE = "S" # GNU tar sparse file -GNUTYPE_MULTIVOL = "M" # GNU tar continuation of a file that began on +REGTYPE = b"0" # regular file +AREGTYPE = b"\0" # regular file +LNKTYPE = b"1" # link (inside tarfile) +SYMTYPE = b"2" # symbolic link +CHRTYPE = b"3" # character special device +BLKTYPE = b"4" # block special device +DIRTYPE = b"5" # directory +FIFOTYPE = b"6" # fifo special device +CONTTYPE = b"7" # contiguous file + +GNUTYPE_LONGNAME = b"L" # GNU tar longname +GNUTYPE_LONGLINK = b"K" # GNU tar longlink +GNUTYPE_SPARSE = b"S" # GNU tar sparse file +GNUTYPE_MULTIVOL = b"M" # GNU tar continuation of a file that began on # another volume -XHDTYPE = "x" # POSIX.1-2001 extended header -XGLTYPE = "g" # POSIX.1-2001 global header -SOLARIS_XHDTYPE = "X" # Solaris extended header +XHDTYPE = b"x" # POSIX.1-2001 extended header +XGLTYPE = b"g" # POSIX.1-2001 global header +SOLARIS_XHDTYPE = b"X" # Solaris extended header USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format GNU_FORMAT = 1 # GNU tar format @@ -123,6 +134,9 @@ GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, PAX_FIELDS = ("path", "linkpath", "size", "mtime", "uid", "gid", "uname", "gname") +# Fields from a pax header that are affected by hdrcharset. +PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"} + # Fields in a pax header that are numbers, all other fields # are treated as strings. PAX_NUMBER_FIELDS = { @@ -135,69 +149,57 @@ PAX_NUMBER_FIELDS = { } #--------------------------------------------------------- -# Bits used in the mode field, values in octal. -#--------------------------------------------------------- -S_IFLNK = 0120000 # symbolic link -S_IFREG = 0100000 # regular file -S_IFBLK = 0060000 # block device -S_IFDIR = 0040000 # directory -S_IFCHR = 0020000 # character device -S_IFIFO = 0010000 # fifo - -TSUID = 04000 # set UID on execution -TSGID = 02000 # set GID on execution -TSVTX = 01000 # reserved - -TUREAD = 0400 # read by owner -TUWRITE = 0200 # write by owner -TUEXEC = 0100 # execute/search by owner -TGREAD = 0040 # read by group -TGWRITE = 0020 # write by group -TGEXEC = 0010 # execute/search by group -TOREAD = 0004 # read by other -TOWRITE = 0002 # write by other -TOEXEC = 0001 # execute/search by other - -#--------------------------------------------------------- # initialization #--------------------------------------------------------- -ENCODING = sys.getfilesystemencoding() -if ENCODING is None: - ENCODING = sys.getdefaultencoding() + +if os.name in ("nt", "ce"): + ENCODING = "utf-8" +else: + ENCODING = sys.getfilesystemencoding() #--------------------------------------------------------- # Some useful functions #--------------------------------------------------------- -def stn(s, length): - """Convert a python string to a null-terminated string buffer. +def stn(s, length, encoding, errors): + """Convert a string to a null-terminated bytes object. """ + s = s.encode(encoding, errors) return s[:length] + (length - len(s)) * NUL -def nts(s): - """Convert a null-terminated string field to a python string. +def nts(s, encoding, errors): + """Convert a null-terminated bytes object to a string. """ - # Use the string up to the first null char. - p = s.find("\0") - if p == -1: - return s - return s[:p] + p = s.find(b"\0") + if p != -1: + s = s[:p] + return s.decode(encoding, errors) + +def sbtn(s, length, encoding, errors): + """Convert a string or a bunch of bytes to a null-terminated bytes object + of specific size. + """ + if isinstance(s, str): + s = s.encode(encoding, errors) + return s[:length] + (length - len(s)) * NUL def nti(s): """Convert a number field to a python number. """ # There are two possible encodings for a number field, see # itn() below. - if s[0] != chr(0200): + if s[0] in (0o200, 0o377): + n = 0 + for i in range(len(s) - 1): + n <<= 8 + n += s[i + 1] + if s[0] == 0o377: + n = -(256 ** (len(s) - 1) - n) + else: try: - n = int(nts(s) or "0", 8) + n = int(nts(s, "ascii", "strict") or "0", 8) except ValueError: raise InvalidHeaderError("invalid header") - else: - n = 0L - for i in xrange(len(s) - 1): - n <<= 8 - n += ord(s[i + 1]) return n def itn(n, digits=8, format=DEFAULT_FORMAT): @@ -206,46 +208,27 @@ def itn(n, digits=8, format=DEFAULT_FORMAT): # POSIX 1003.1-1988 requires numbers to be encoded as a string of # octal digits followed by a null-byte, this allows values up to # (8**(digits-1))-1. GNU tar allows storing numbers greater than - # that if necessary. A leading 0200 byte indicates this particular - # encoding, the following digits-1 bytes are a big-endian - # representation. This allows values up to (256**(digits-1))-1. + # that if necessary. A leading 0o200 or 0o377 byte indicate this + # particular encoding, the following digits-1 bytes are a big-endian + # base-256 representation. This allows values up to (256**(digits-1))-1. + # A 0o200 byte indicates a positive number, a 0o377 byte a negative + # number. if 0 <= n < 8 ** (digits - 1): - s = "%0*o" % (digits - 1, n) + NUL - else: - if format != GNU_FORMAT or n >= 256 ** (digits - 1): - raise ValueError("overflow in number field") - - if n < 0: - # XXX We mimic GNU tar's behaviour with negative numbers, - # this could raise OverflowError. - n = struct.unpack("L", struct.pack("l", n))[0] + s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL + elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1): + if n >= 0: + s = bytearray([0o200]) + else: + s = bytearray([0o377]) + n = 256 ** digits + n - s = "" - for i in xrange(digits - 1): - s = chr(n & 0377) + s + for i in range(digits - 1): + s.insert(1, n & 0o377) n >>= 8 - s = chr(0200) + s - return s - -def uts(s, encoding, errors): - """Convert a unicode object to a string. - """ - if errors == "utf-8": - # An extra error handler similar to the -o invalid=UTF-8 option - # in POSIX.1-2001. Replace untranslatable characters with their - # UTF-8 representation. - try: - return s.encode(encoding, "strict") - except UnicodeEncodeError: - x = [] - for c in s: - try: - x.append(c.encode(encoding, "strict")) - except UnicodeEncodeError: - x.append(c.encode("utf8")) - return "".join(x) else: - return s.encode(encoding, errors) + raise ValueError("overflow in number field") + + return s def calc_chksums(buf): """Calculate the checksum for a member's header by summing up all @@ -256,8 +239,8 @@ def calc_chksums(buf): the high bit set. So we calculate two checksums, unsigned and signed. """ - unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512])) - signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512])) + unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf)) + signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf)) return unsigned_chksum, signed_chksum def copyfileobj(src, dst, length=None): @@ -272,60 +255,25 @@ def copyfileobj(src, dst, length=None): BUFSIZE = 16 * 1024 blocks, remainder = divmod(length, BUFSIZE) - for b in xrange(blocks): + for b in range(blocks): buf = src.read(BUFSIZE) dst.write(buf) if len(buf) < BUFSIZE: - raise IOError("end of file reached") + raise OSError("end of file reached") if remainder != 0: buf = src.read(remainder) dst.write(buf) if len(buf) < remainder: - raise IOError("end of file reached") + raise OSError("end of file reached") return -filemode_table = ( - ((S_IFLNK, "l"), - (S_IFREG, "-"), - (S_IFBLK, "b"), - (S_IFDIR, "d"), - (S_IFCHR, "c"), - (S_IFIFO, "p")), - - ((TUREAD, "r"),), - ((TUWRITE, "w"),), - ((TUEXEC|TSUID, "s"), - (TSUID, "S"), - (TUEXEC, "x")), - - ((TGREAD, "r"),), - ((TGWRITE, "w"),), - ((TGEXEC|TSGID, "s"), - (TSGID, "S"), - (TGEXEC, "x")), - - ((TOREAD, "r"),), - ((TOWRITE, "w"),), - ((TOEXEC|TSVTX, "t"), - (TSVTX, "T"), - (TOEXEC, "x")) -) - def filemode(mode): - """Convert a file's mode to a string of the form - -rwxrwxrwx. - Used by TarFile.list() - """ - perm = [] - for table in filemode_table: - for bit, char in table: - if mode & bit == bit: - perm.append(char) - break - else: - perm.append("-") - return "".join(perm) + """Deprecated in this location; use stat.filemode.""" + import warnings + warnings.warn("deprecated in favor of stat.filemode", + DeprecationWarning, 2) + return stat.filemode(mode) class TarError(Exception): """Base exception.""" @@ -334,7 +282,7 @@ class ExtractError(TarError): """General exception for extract errors.""" pass class ReadError(TarError): - """Exception for unreadble tar archives.""" + """Exception for unreadable tar archives.""" pass class CompressionError(TarError): """Exception for unavailable compression methods.""" @@ -377,7 +325,7 @@ class _LowLevelFile: }[mode] if hasattr(os, "O_BINARY"): _mode |= os.O_BINARY - self.fd = os.open(name, _mode, 0666) + self.fd = os.open(name, _mode, 0o666) self.offset = 0 def close(self): @@ -427,52 +375,77 @@ class _Stream: self.comptype = comptype self.fileobj = fileobj self.bufsize = bufsize - self.buf = "" - self.pos = 0L - self.concat_pos = 0L + self.buf = b"" + self.pos = 0 + self.concat_pos = 0 self.closed = False - self.flags = 0L - self.internal_pos = 0L + self.flags = 0 + self.internal_pos = 0 self.concat_stream = concat_stream self.enctype = enctype self.key_length = key_length self.password = password - self.last_block_offset = 0L - self.dbuf = "" - self.aes_buf = "" + self.last_block_offset = 0 + self.dbuf = b"" + self.aes_buf = b"" + self.exception = None - if comptype == "gz": - try: - import zlib - except ImportError: - raise CompressionError("zlib module is not available") - self.zlib = zlib - if mode == "r": - if self.enctype == 'aes': - self.encryption = aescrypto.AESCrypt(self.password, - key_length=self.key_length) - self._init_read_gz() - else: - self._init_write_gz() - self.crc = zlib.crc32("") & 0xffffffffL + try: + if comptype == "gz": + try: + import zlib + except ImportError: + raise CompressionError("zlib module is not available") + self.zlib = zlib + if mode == "r": + if self.enctype == 'aes': + self.encryption = aescrypto.AESCrypt(self.password, + key_length=self.key_length) + self._init_read_gz() + self.exception = zlib.error + else: + self._init_write_gz() + self.crc = zlib.crc32(b"") - elif comptype == "bz2": - try: - import bz2 - except ImportError: - raise CompressionError("bz2 module is not available") - if mode == "r": - self.dbuf = "" - self.cmp = bz2.BZ2Decompressor() - else: - self.cmp = bz2.BZ2Compressor() + elif comptype == "bz2": + try: + import bz2 + except ImportError: + raise CompressionError("bz2 module is not available") + if mode == "r": + self.dbuf = b"" + self.cmp = bz2.BZ2Decompressor() + self.exception = OSError + else: + self.cmp = bz2.BZ2Compressor() - elif self.enctype == 'aes': - self.encryption = aescrypto.AESCrypt(self.password, - key_length=self.key_length) - if mode != "r": - self.encryption.init() - self.__write_to_file(self.encryption.salt_str) + elif comptype == 'xz': + try: + import lzma + except ImportError: + raise CompressionError("lzma module is not available") + if mode == "r": + self.dbuf = b"" + self.cmp = lzma.LZMADecompressor() + self.exception = lzma.LZMAError + else: + self.cmp = lzma.LZMACompressor() + + elif self.enctype == 'aes': + self.encryption = aescrypto.AESCrypt(self.password, + key_length=self.key_length) + if mode != "r": + self.encryption.init() + self.__write_to_file(self.encryption.salt_str) + + elif comptype != "tar": + raise CompressionError("unknown compression type %r" % comptype) + + except: + if not self._extfileobj: + self.fileobj.close() + self.closed = True + raise def __del__(self): if hasattr(self, "closed") and not self.closed: @@ -493,14 +466,15 @@ class _Stream: self.encryption.init() self.__write_to_file(self.encryption.salt_str) - timestamp = struct.pack("= 0: blocks, remainder = divmod(pos - self.pos, self.bufsize) - for i in xrange(blocks): + for i in range(blocks): self.read(self.bufsize) self.read(remainder) else: @@ -702,8 +676,8 @@ class _Stream: """Reads just one line, new line character included """ # if \n in dbuf, no read neads to be done - if '\n' in self.dbuf: - pos = self.dbuf.index('\n') + 1 + if b'\n' in self.dbuf: + pos = self.dbuf.index(b'\n') + 1 ret = self.dbuf[:pos] self.dbuf = self.dbuf[pos:] return ret @@ -714,14 +688,14 @@ class _Stream: # nothing more to read, so return the buffer if not chunk: - return ''.join(buf) + return b''.join(buf) buf.append(chunk) # if \n found, return the new line - if '\n' in chunk: - dbuf = ''.join(buf) - pos = dbuf.index('\n') + 1 + if b'\n' in chunk: + dbuf = b''.join(buf) + pos = dbuf.index(b'\n') + 1 self.dbuf = dbuf[pos:] + self.dbuf return dbuf[:pos] @@ -740,14 +714,17 @@ class _Stream: buf = self.cmp.decompress(buf) except IOError: raise ReadError("invalid compressed data") - except Exception, e: + except Exception as e: # happens at the end of the file # _init_read_gz failed in the previous iteration so # sel.cmp.descompress fails here - pass + if self.concat_stream: + pass + else: + raise ReadError("invalid compressed data") if self.comptype == "gz" and hasattr(self, "crc"): - self.crc = self.zlib.crc32(buf, self.crc) & 0xffffffffL + self.crc = self.zlib.crc32(buf, self.crc) if self.concat_stream and len(self.cmp.unused_data) != 0: self.buf = self.cmp.unused_data + self.buf self.close(close_fileobj=False) @@ -756,11 +733,11 @@ class _Stream: except: # happens at the end of the file pass - self.crc = self.zlib.crc32("") & 0xffffffffL + self.crc = self.zlib.crc32(b"") self.closed = False t.append(buf) c += len(buf) - t = "".join(t) + t = b"".join(t) self.dbuf = t[size:] return t[:size] @@ -777,7 +754,7 @@ class _Stream: t.append(buf) c += len(buf) self.internal_pos += len(buf) - t = "".join(t) + t = b"".join(t) self.buf = t[size:] return t[:size] @@ -815,15 +792,15 @@ class _Stream: chars because the file is decrypted in multiples of the key size. ''' if self.enctype == 'aes': - kl = self.key_length/8 + kl = int(self.key_length/8) buf = self.fileobj.read(size - kl) last = len(buf) < (size - kl) buf = self.aes_buf + buf - self.aes_buf = "" + self.aes_buf = b"" # prevent setting last to False when it shouldn't if not last: - last = buf[-kl:].startswith('Salted__') + last = buf[-kl:].startswith(b'Salted__') self.aes_buf = buf[-kl:] buf = buf[:-kl] @@ -837,7 +814,7 @@ class _Stream: return buf try: - idx = buf.index('Salted__') + idx = buf.index(b'Salted__') except ValueError: buf = self.encryption.decrypt(buf, last) else: @@ -846,7 +823,7 @@ class _Stream: if b1: buf = self.encryption.decrypt(b1, True) else: - buf = '' + buf = b'' self.encryption.get_salt_str(b2) self.encryption.init() @@ -869,78 +846,19 @@ class _StreamProxy(object): return self.buf def getcomptype(self): - if self.buf.startswith("\037\213\010"): + if self.buf.startswith(b"\x1f\x8b\x08"): return "gz" - if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY": + elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY": return "bz2" - return "tar" + elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")): + return "xz" + else: + return "tar" def close(self): self.fileobj.close() # class StreamProxy -class _BZ2Proxy(object): - """Small proxy class that enables external file object - support for "r:bz2" and "w:bz2" modes. This is actually - a workaround for a limitation in bz2 module's BZ2File - class which (unlike gzip.GzipFile) has no support for - a file object argument. - """ - - blocksize = 16 * 1024 - - def __init__(self, fileobj, mode): - self.fileobj = fileobj - self.mode = mode - self.name = getattr(self.fileobj, "name", None) - self.init() - - def init(self): - import bz2 - self.pos = 0 - if self.mode == "r": - self.bz2obj = bz2.BZ2Decompressor() - self.fileobj.seek(0) - self.buf = "" - else: - self.bz2obj = bz2.BZ2Compressor() - - def read(self, size): - b = [self.buf] - x = len(self.buf) - while x < size: - raw = self.fileobj.read(self.blocksize) - if not raw: - break - data = self.bz2obj.decompress(raw) - b.append(data) - x += len(data) - self.buf = "".join(b) - - buf = self.buf[:size] - self.buf = self.buf[size:] - self.pos += len(buf) - return buf - - def seek(self, pos): - if pos < self.pos: - self.init() - self.read(pos - self.pos) - - def tell(self): - return self.pos - - def write(self, data): - self.pos += len(data) - raw = self.bz2obj.compress(data) - self.fileobj.write(raw) - - def close(self): - if self.mode == "w": - raw = self.bz2obj.flush() - self.fileobj.write(raw) -# class _BZ2Proxy - #------------------------ # Extraction file object #------------------------ @@ -950,22 +868,63 @@ class _FileInFile(object): object. """ - def __init__(self, fileobj, offset, size, sparse=None): + def __init__(self, fileobj, offset, size, blockinfo=None): self.fileobj = fileobj self.offset = offset self.size = size - self.sparse = sparse self.position = 0 + self.name = getattr(fileobj, "name", None) + self.closed = False + + if blockinfo is None: + blockinfo = [(0, size)] + + # Construct a map with data and zero blocks. + self.map_index = 0 + self.map = [] + lastpos = 0 + realpos = self.offset + for offset, size in blockinfo: + if offset > lastpos: + self.map.append((False, lastpos, offset, None)) + self.map.append((True, offset, offset + size, realpos)) + realpos += size + lastpos = offset + size + if lastpos < self.size: + self.map.append((False, lastpos, self.size, None)) + + def flush(self): + pass + + def readable(self): + return True + + def writable(self): + return False + + def seekable(self): + return self.fileobj.seekable() def tell(self): """Return the current file position. """ return self.position - def seek(self, position): + def seek(self, position, whence=io.SEEK_SET): """Seek to a position in the file. """ - self.position = position + if whence == io.SEEK_SET: + self.position = min(max(position, 0), self.size) + elif whence == io.SEEK_CUR: + if position < 0: + self.position = max(self.position + position, 0) + else: + self.position = min(self.position + position, self.size) + elif whence == io.SEEK_END: + self.position = max(min(self.size + position, self.size), 0) + else: + raise ValueError("Invalid argument") + return self.position def read(self, size=None): """Read data from the file. @@ -975,177 +934,42 @@ class _FileInFile(object): else: size = min(size, self.size - self.position) - if self.sparse is None: - return self.readnormal(size) - else: - return self.readsparse(size) - - def readnormal(self, size): - """Read operation for regular files. - """ - self.fileobj.seek(self.offset + self.position) - self.position += size - return self.fileobj.read(size) - - def readsparse(self, size): - """Read operation for sparse files. - """ - data = [] + buf = b"" while size > 0: - buf = self.readsparsesection(size) - if not buf: - break - size -= len(buf) - data.append(buf) - return "".join(data) - - def readsparsesection(self, size): - """Read a single section of a sparse file. - """ - section = self.sparse.find(self.position) - - if section is None: - return "" - - size = min(size, section.offset + section.size - self.position) - - if isinstance(section, _data): - realpos = section.realpos + self.position - section.offset - self.fileobj.seek(self.offset + realpos) - self.position += size - return self.fileobj.read(size) - else: - self.position += size - return NUL * size -#class _FileInFile - - -class ExFileObject(object): - """File-like object for reading an archive member. - Is returned by TarFile.extractfile(). - """ - blocksize = 1024 - - def __init__(self, tarfile, tarinfo): - self.fileobj = _FileInFile(tarfile.fileobj, - tarinfo.offset_data, - tarinfo.size, - getattr(tarinfo, "sparse", None)) - self.name = tarinfo.name - self.mode = "r" - self.closed = False - self.size = tarinfo.size - - self.position = 0 - self.buffer = "" - - def read(self, size=None): - """Read at most size bytes from the file. If size is not - present or None, read all data until EOF is reached. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - buf = "" - if self.buffer: - if size is None: - buf = self.buffer - self.buffer = "" - else: - buf = self.buffer[:size] - self.buffer = self.buffer[size:] - - if size is None: - buf += self.fileobj.read() - else: - buf += self.fileobj.read(size - len(buf)) - - self.position += len(buf) - return buf - - def readline(self, size=-1): - """Read one entire line from the file. If size is present - and non-negative, return a string with at most that - size, which may be an incomplete line. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - if "\n" in self.buffer: - pos = self.buffer.find("\n") + 1 - else: - buffers = [self.buffer] while True: - buf = self.fileobj.read(self.blocksize) - buffers.append(buf) - if not buf or "\n" in buf: - self.buffer = "".join(buffers) - pos = self.buffer.find("\n") + 1 - if pos == 0: - # no newline found. - pos = len(self.buffer) + data, start, stop, offset = self.map[self.map_index] + if start <= self.position < stop: break - - if size != -1: - pos = min(size, pos) - - buf = self.buffer[:pos] - self.buffer = self.buffer[pos:] - self.position += len(buf) - return buf - - def readlines(self): - """Return a list with all remaining lines. - """ - result = [] - while True: - line = self.readline() - if not line: break - result.append(line) - return result - - def tell(self): - """Return the current file position. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - return self.position - - def seek(self, pos, whence=os.SEEK_SET): - """Seek to a position in the file. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - if whence == os.SEEK_SET: - self.position = min(max(pos, 0), self.size) - elif whence == os.SEEK_CUR: - if pos < 0: - self.position = max(self.position + pos, 0) + else: + self.map_index += 1 + if self.map_index == len(self.map): + self.map_index = 0 + length = min(size, stop - self.position) + if data: + self.fileobj.seek(offset + (self.position - start)) + buf += self.fileobj.read(length) else: - self.position = min(self.position + pos, self.size) - elif whence == os.SEEK_END: - self.position = max(min(self.size + pos, self.size), 0) - else: - raise ValueError("Invalid argument") + buf += NUL * length + size -= length + self.position += length + return buf - self.buffer = "" - self.fileobj.seek(self.position) + def readinto(self, b): + buf = self.read(len(b)) + b[:len(buf)] = buf + return len(buf) def close(self): - """Close the file object. - """ self.closed = True +#class _FileInFile - def __iter__(self): - """Get an iterator over the file's lines. - """ - while True: - line = self.readline() - if not line: - break - yield line + +class ExFileObject(io.BufferedReader): + + def __init__(self, tarfile, tarinfo): + fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data, + tarinfo.size, tarinfo.sparse) + super().__init__(fileobj) #class ExFileObject #------------------ @@ -1159,12 +983,18 @@ class TarInfo(object): usually created internally. """ + __slots__ = ("name", "mode", "uid", "gid", "size", "mtime", + "chksum", "type", "linkname", "uname", "gname", + "devmajor", "devminor", "volume_offset", + "offset", "offset_data", "pax_headers", "sparse", + "tarfile", "_sparse_structs", "_link_target") + def __init__(self, name=""): """Construct a TarInfo object. name is the optional name of the member. """ self.name = name # member name - self.mode = 0644 # file permissions + self.mode = 0o644 # file permissions self.uid = 0 # user id self.gid = 0 # group id self.size = 0 # file size @@ -1182,6 +1012,7 @@ class TarInfo(object): self.volume_offset = 0 # the file's data corresponds with the data # starting at this position + self.sparse = None # sparse member information self.pax_headers = {} # pax header information # In pax headers the "name" and "linkname" field are called @@ -1201,12 +1032,12 @@ class TarInfo(object): def __repr__(self): return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) - def get_info(self, encoding, errors): + def get_info(self, encoding=None, errors=None): """Return the TarInfo's attributes as a dictionary. """ info = { "name": self.name, - "mode": self.mode & 07777, + "mode": self.mode & 0o7777, "uid": self.uid, "gid": self.gid, "size": self.size, @@ -1225,27 +1056,24 @@ class TarInfo(object): if info["type"] == DIRTYPE and not info["name"].endswith("/"): info["name"] += "/" - for key in ("name", "linkname", "uname", "gname"): - if type(info[key]) is unicode: - info[key] = info[key].encode(encoding, errors) - return info - def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"): + def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, + errors="surrogateescape"): """Return a tar header as a string of 512 byte blocks. """ info = self.get_info(encoding, errors) if format == USTAR_FORMAT: - return self.create_ustar_header(info) + return self.create_ustar_header(info, encoding, errors) elif format == GNU_FORMAT: - return self.create_gnu_header(info) + return self.create_gnu_header(info, encoding, errors) elif format == PAX_FORMAT: return self.create_pax_header(info, encoding, errors) else: raise ValueError("invalid format") - def create_ustar_header(self, info): + def create_ustar_header(self, info, encoding, errors): """Return the object as a ustar header block. """ info["magic"] = POSIX_MAGIC @@ -1256,9 +1084,9 @@ class TarInfo(object): if len(info["name"]) > LENGTH_NAME: info["prefix"], info["name"] = self._posix_split_name(info["name"]) - return self._create_header(info, USTAR_FORMAT) + return self._create_header(info, USTAR_FORMAT, encoding, errors) - def create_gnu_header(self, info): + def create_gnu_header(self, info, encoding, errors): """Return the object as a GNU header block sequence. """ info["magic"] = GNU_MAGIC @@ -1270,17 +1098,19 @@ class TarInfo(object): itn(self.volume_offset, 12, GNU_FORMAT), itn(0, 119, GNU_FORMAT), # stuff unused in this tar implementation, set to zero ] - info['prefix'] = "".join(prefix) + info['prefix'] = b"".join(prefix) info['size'] = info['size'] - self.volume_offset - buf = "" + buf = b"" if len(info["linkname"]) > LENGTH_LINK: - buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK) + buf += self._create_gnu_long_header(info["linkname"], + GNUTYPE_LONGLINK, encoding, errors) if len(info["name"]) > LENGTH_NAME: - buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME) + buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, + encoding, errors) - return buf + self._create_header(info, GNU_FORMAT) + return buf + self._create_header(info, GNU_FORMAT, encoding, errors) def create_pax_header(self, info, encoding, errors): """Return the object as a ustar header block. If it cannot be @@ -1304,17 +1134,15 @@ class TarInfo(object): # The pax header has priority. continue - val = info[name].decode(encoding, errors) - # Try to encode the string as ASCII. try: - val.encode("ascii") + info[name].encode("ascii", "strict") except UnicodeEncodeError: - pax_headers[hname] = val + pax_headers[hname] = info[name] continue if len(info[name]) > length: - pax_headers[hname] = val + pax_headers[hname] = info[name] # Test number fields for values that exceed the field limit or values # that like to be stored as float. @@ -1326,22 +1154,22 @@ class TarInfo(object): val = info[name] if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): - pax_headers[name] = unicode(val) + pax_headers[name] = str(val) info[name] = 0 # Create a pax extended header if necessary. if pax_headers: - buf = self._create_pax_generic_header(pax_headers) + buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding) else: - buf = "" + buf = b"" - return buf + self._create_header(info, USTAR_FORMAT) + return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") @classmethod def create_pax_global_header(cls, pax_headers): """Return the object as a pax global header block sequence. """ - return cls._create_pax_generic_header(pax_headers, type=XGLTYPE) + return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") def _posix_split_name(self, name): """Split a name longer than 100 chars into a prefix @@ -1359,31 +1187,31 @@ class TarInfo(object): return prefix, name @staticmethod - def _create_header(info, format): + def _create_header(info, format, encoding, errors): """Return a header block. info is a dictionary with file information, format must be one of the *_FORMAT constants. """ parts = [ - stn(info.get("name", ""), 100), - itn(info.get("mode", 0) & 07777, 8, format), + stn(info.get("name", ""), 100, encoding, errors), + itn(info.get("mode", 0) & 0o7777, 8, format), itn(info.get("uid", 0), 8, format), itn(info.get("gid", 0), 8, format), itn(info.get("size", 0), 12, format), itn(info.get("mtime", 0), 12, format), - " ", # checksum field + b" ", # checksum field info.get("type", REGTYPE), - stn(info.get("linkname", ""), 100), - stn(info.get("magic", POSIX_MAGIC), 8), - stn(info.get("uname", ""), 32), - stn(info.get("gname", ""), 32), + stn(info.get("linkname", ""), 100, encoding, errors), + info.get("magic", POSIX_MAGIC), + stn(info.get("uname", ""), 32, encoding, errors), + stn(info.get("gname", ""), 32, encoding, errors), itn(info.get("devmajor", 0), 8, format), itn(info.get("devminor", 0), 8, format), - stn(info.get("prefix", ""), 155) + sbtn(info.get("prefix", ""), 155, encoding, errors) ] - buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts)) + buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts)) chksum = calc_chksums(buf[-BLOCKSIZE:])[0] - buf = buf[:-364] + "%06o\0" % chksum + buf[-357:] + buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:] return buf @staticmethod @@ -1397,11 +1225,11 @@ class TarInfo(object): return payload @classmethod - def _create_gnu_long_header(cls, name, type): + def _create_gnu_long_header(cls, name, type, encoding, errors): """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence for name. """ - name += NUL + name = name.encode(encoding, errors) + NUL info = {} info["name"] = "././@LongLink" @@ -1410,19 +1238,39 @@ class TarInfo(object): info["magic"] = GNU_MAGIC # create extended header + name blocks. - return cls._create_header(info, USTAR_FORMAT) + \ + return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ cls._create_payload(name) @classmethod - def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE): - """Return a POSIX.1-2001 extended or global header sequence + def _create_pax_generic_header(cls, pax_headers, type, encoding): + """Return a POSIX.1-2008 extended or global header sequence that contains a list of keyword, value pairs. The values - must be unicode objects. + must be strings. """ - records = [] - for keyword, value in pax_headers.iteritems(): - keyword = keyword.encode("utf8") - value = value.encode("utf8") + # Check if one of the fields contains surrogate characters and thereby + # forces hdrcharset=BINARY, see _proc_pax() for more information. + binary = False + for keyword, value in pax_headers.items(): + try: + value.encode("utf-8", "strict") + except UnicodeEncodeError: + binary = True + break + + records = b"" + if binary: + # Put the hdrcharset field at the beginning of the header. + records += b"21 hdrcharset=BINARY\n" + + for keyword, value in pax_headers.items(): + keyword = keyword.encode("utf-8") + if binary: + # Try to restore the original byte representation of `value'. + # Needless to say, that the encoding must match the string. + value = value.encode(encoding, "surrogateescape") + else: + value = value.encode("utf-8") + l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' n = p = 0 while True: @@ -1430,8 +1278,7 @@ class TarInfo(object): if n == p: break p = n - records.append("%d %s=%s\n" % (p, keyword, value)) - records = "".join(records) + records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n" # We use a hardcoded "././@PaxHeader" name like star does # instead of the one that POSIX recommends. @@ -1442,12 +1289,12 @@ class TarInfo(object): info["magic"] = POSIX_MAGIC # Create pax header + record blocks. - return cls._create_header(info, USTAR_FORMAT) + \ + return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \ cls._create_payload(records) @classmethod - def frombuf(cls, buf): - """Construct a TarInfo object from a 512 byte string buffer. + def frombuf(cls, buf, encoding, errors): + """Construct a TarInfo object from a 512 byte bytes object. """ if len(buf) == 0: raise EmptyHeaderError("empty header") @@ -1461,8 +1308,7 @@ class TarInfo(object): raise InvalidHeaderError("bad checksum") obj = cls() - obj.buf = buf - obj.name = nts(buf[0:100]) + obj.name = nts(buf[0:100], encoding, errors) obj.mode = nti(buf[100:108]) obj.uid = nti(buf[108:116]) obj.gid = nti(buf[116:124]) @@ -1470,12 +1316,30 @@ class TarInfo(object): obj.mtime = nti(buf[136:148]) obj.chksum = chksum obj.type = buf[156:157] - obj.linkname = nts(buf[157:257]) - obj.uname = nts(buf[265:297]) - obj.gname = nts(buf[297:329]) + obj.linkname = nts(buf[157:257], encoding, errors) + obj.uname = nts(buf[265:297], encoding, errors) + obj.gname = nts(buf[297:329], encoding, errors) obj.devmajor = nti(buf[329:337]) obj.devminor = nti(buf[337:345]) - prefix = nts(buf[345:500]) + prefix = nts(buf[345:500], encoding, errors) + + # The old GNU sparse format occupies some of the unused + # space in the buffer for up to 4 sparse structures. + # Save the them for later processing in _proc_sparse(). + if obj.type == GNUTYPE_SPARSE: + pos = 386 + structs = [] + for i in range(4): + try: + offset = nti(buf[pos:pos + 12]) + numbytes = nti(buf[pos + 12:pos + 24]) + except ValueError: + break + structs.append((offset, numbytes)) + pos += 24 + isextended = bool(buf[482]) + origsize = nti(buf[483:495]) + obj._sparse_structs = (structs, isextended, origsize) # Old V7 tar format represents a directory as a regular # file with a trailing slash. @@ -1499,7 +1363,7 @@ class TarInfo(object): tarfile. """ buf = tarfile.fileobj.read(BLOCKSIZE) - obj = cls.frombuf(buf) + obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors) obj.offset = tarfile.fileobj.tell() - BLOCKSIZE return obj._proc_member(tarfile) @@ -1560,71 +1424,43 @@ class TarInfo(object): # the longname information. next.offset = self.offset if self.type == GNUTYPE_LONGNAME: - next.name = nts(buf) + next.name = nts(buf, tarfile.encoding, tarfile.errors) elif self.type == GNUTYPE_LONGLINK: - next.linkname = nts(buf) + next.linkname = nts(buf, tarfile.encoding, tarfile.errors) return next def _proc_sparse(self, tarfile): """Process a GNU sparse header plus extra headers. """ - buf = self.buf - sp = _ringbuffer() - pos = 386 - lastpos = 0L - realpos = 0L - # There are 4 possible sparse structs in the - # first header. - for i in xrange(4): - try: - offset = nti(buf[pos:pos + 12]) - numbytes = nti(buf[pos + 12:pos + 24]) - except ValueError: - break - if offset > lastpos: - sp.append(_hole(lastpos, offset - lastpos)) - sp.append(_data(offset, numbytes, realpos)) - realpos += numbytes - lastpos = offset + numbytes - pos += 24 - - isextended = ord(buf[482]) - origsize = nti(buf[483:495]) - - # If the isextended flag is given, - # there are extra headers to process. - while isextended == 1: + # We already collected some sparse structures in frombuf(). + structs, isextended, origsize = self._sparse_structs + del self._sparse_structs + + # Collect sparse structures from extended header blocks. + while isextended: buf = tarfile.fileobj.read(BLOCKSIZE) pos = 0 - for i in xrange(21): + for i in range(21): try: offset = nti(buf[pos:pos + 12]) numbytes = nti(buf[pos + 12:pos + 24]) except ValueError: break - if offset > lastpos: - sp.append(_hole(lastpos, offset - lastpos)) - sp.append(_data(offset, numbytes, realpos)) - realpos += numbytes - lastpos = offset + numbytes + if offset and numbytes: + structs.append((offset, numbytes)) pos += 24 - isextended = ord(buf[504]) - - if lastpos < origsize: - sp.append(_hole(lastpos, origsize - lastpos)) - - self.sparse = sp + isextended = bool(buf[504]) + self.sparse = structs self.offset_data = tarfile.fileobj.tell() tarfile.offset = self.offset_data + self._block(self.size) self.size = origsize - return self def _proc_pax(self, tarfile): """Process an extended or global header as described in - POSIX.1-2001. + POSIX.1-2008. """ # Read the header information. buf = tarfile.fileobj.read(self._block(self.size)) @@ -1637,11 +1473,29 @@ class TarInfo(object): else: pax_headers = tarfile.pax_headers.copy() + # Check if the pax header contains a hdrcharset field. This tells us + # the encoding of the path, linkpath, uname and gname fields. Normally, + # these fields are UTF-8 encoded but since POSIX.1-2008 tar + # implementations are allowed to store them as raw binary strings if + # the translation to UTF-8 fails. + match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) + if match is not None: + pax_headers["hdrcharset"] = match.group(1).decode("utf-8") + + # For the time being, we don't care about anything other than "BINARY". + # The only other value that is currently allowed by the standard is + # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. + hdrcharset = pax_headers.get("hdrcharset") + if hdrcharset == "BINARY": + encoding = tarfile.encoding + else: + encoding = "utf-8" + # Parse pax header information. A record looks like that: # "%d %s=%s\n" % (length, keyword, value). length is the size # of the complete record including the length field itself and # the newline. keyword and value are both UTF-8 encoded strings. - regex = re.compile(r"(\d+) ([^=]+)=", re.U) + regex = re.compile(br"(\d+) ([^=]+)=") pos = 0 while True: match = regex.match(buf, pos) @@ -1652,8 +1506,21 @@ class TarInfo(object): length = int(length) value = buf[match.end(2) + 1:match.start(1) + length - 1] - keyword = keyword.decode("utf8") - value = value.decode("utf8") + # Normally, we could just use "utf-8" as the encoding and "strict" + # as the error handler, but we better not take the risk. For + # example, GNU tar <= 1.23 is known to store filenames it cannot + # translate to UTF-8 as raw strings (unfortunately without a + # hdrcharset=BINARY header). + # We first try the strict standard encoding, and if that fails we + # fall back on the user's encoding and error handler. + keyword = self._decode_pax_field(keyword, "utf-8", "utf-8", + tarfile.errors) + if keyword in PAX_NAME_FIELDS: + value = self._decode_pax_field(value, encoding, tarfile.encoding, + tarfile.errors) + else: + value = self._decode_pax_field(value, "utf-8", "utf-8", + tarfile.errors) pax_headers[keyword] = value pos += length @@ -1665,6 +1532,19 @@ class TarInfo(object): except HeaderError: raise SubsequentHeaderError("missing or bad subsequent header") + # Process GNU sparse information. + if "GNU.sparse.map" in pax_headers: + # GNU extended sparse format version 0.1. + self._proc_gnusparse_01(next, pax_headers) + + elif "GNU.sparse.size" in pax_headers: + # GNU extended sparse format version 0.0. + self._proc_gnusparse_00(next, pax_headers, buf) + + elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": + # GNU extended sparse format version 1.0. + self._proc_gnusparse_10(next, pax_headers, tarfile) + if self.type in (XHDTYPE, SOLARIS_XHDTYPE): # Patch the TarInfo object with the extended header info. next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) @@ -1693,29 +1573,70 @@ class TarInfo(object): return next - def _apply_pax_info(self, pax_headers, encoding, errors): - """Replace fields with supplemental information from a previous - pax extended or global header. + def _proc_gnusparse_00(self, next, pax_headers, buf): + """Process a GNU tar extended sparse header, version 0.0. """ - for keyword, value in pax_headers.iteritems(): - if keyword not in PAX_FIELDS: - continue + offsets = [] + for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): + offsets.append(int(match.group(1))) + numbytes = [] + for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): + numbytes.append(int(match.group(1))) + next.sparse = list(zip(offsets, numbytes)) - if keyword == "path": - value = value.rstrip("/") + def _proc_gnusparse_01(self, next, pax_headers): + """Process a GNU tar extended sparse header, version 0.1. + """ + sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")] + next.sparse = list(zip(sparse[::2], sparse[1::2])) - if keyword in PAX_NUMBER_FIELDS: - try: - value = PAX_NUMBER_FIELDS[keyword](value) - except ValueError: - value = 0 - else: - value = uts(value, encoding, errors) + def _proc_gnusparse_10(self, next, pax_headers, tarfile): + """Process a GNU tar extended sparse header, version 1.0. + """ + fields = None + sparse = [] + buf = tarfile.fileobj.read(BLOCKSIZE) + fields, buf = buf.split(b"\n", 1) + fields = int(fields) + while len(sparse) < fields * 2: + if b"\n" not in buf: + buf += tarfile.fileobj.read(BLOCKSIZE) + number, buf = buf.split(b"\n", 1) + sparse.append(int(number)) + next.offset_data = tarfile.fileobj.tell() + next.sparse = list(zip(sparse[::2], sparse[1::2])) - setattr(self, keyword, value) + def _apply_pax_info(self, pax_headers, encoding, errors): + """Replace fields with supplemental information from a previous + pax extended or global header. + """ + for keyword, value in pax_headers.items(): + if keyword == "GNU.sparse.name": + setattr(self, "path", value) + elif keyword == "GNU.sparse.size": + setattr(self, "size", int(value)) + elif keyword == "GNU.sparse.realsize": + setattr(self, "size", int(value)) + elif keyword in PAX_FIELDS: + if keyword in PAX_NUMBER_FIELDS: + try: + value = PAX_NUMBER_FIELDS[keyword](value) + except ValueError: + value = 0 + if keyword == "path": + value = value.rstrip("/") + setattr(self, keyword, value) self.pax_headers = pax_headers.copy() + def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors): + """Decode a single field from a pax record. + """ + try: + return value.decode(encoding, "strict") + except UnicodeDecodeError: + return value.decode(fallback_encoding, fallback_errors) + def _block(self, count): """Round up a byte count by BLOCKSIZE and return it, e.g. _block(834) => 1024. @@ -1742,7 +1663,7 @@ class TarInfo(object): def isfifo(self): return self.type == FIFOTYPE def issparse(self): - return self.type == GNUTYPE_SPARSE + return self.sparse is not None def isdev(self): return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) def ismultivol(self): @@ -1783,7 +1704,7 @@ class TarFile(object): tarinfo = TarInfo # The default TarInfo class to use. - fileobject = ExFileObject # The default ExFileObject class to use. + fileobject = ExFileObject # The file-object for extractfile(). concat_compression = False # Used to separate in different zip members each # file, used for robustness. @@ -1796,8 +1717,8 @@ class TarFile(object): def __init__(self, name=None, mode="r", fileobj=None, format=None, tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, - errors=None, pax_headers=None, debug=None, errorlevel=None, - max_volume_size=None, new_volume_handler=None, + errors="surrogateescape", pax_headers=None, debug=None, + errorlevel=None, max_volume_size=None, new_volume_handler=None, concat_compression=False, password='', save_to_members=True): """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to read from an existing archive, 'a' to append data to an existing @@ -1825,9 +1746,10 @@ class TarFile(object): if name is None and hasattr(fileobj, "name"): name = fileobj.name # when fileobj is a gzip.GzipFile, fileobj.mode is an int (not valid for us) - if hasattr(fileobj, "mode") and isinstance(fileobj.mode, basestring): + if hasattr(fileobj, "mode") and isinstance(fileobj.mode, str): self._mode = fileobj.mode self._extfileobj = True + self.name = os.path.abspath(name) if name else None self.base_name = self.name = os.path.abspath(name) if name else None self.fileobj = fileobj @@ -1843,12 +1765,7 @@ class TarFile(object): if encoding is not None: self.encoding = encoding - if errors is not None: - self.errors = errors - elif mode == "r": - self.errors = "utf-8" - else: - self.errors = "strict" + self.errors = errors if pax_headers is not None and self.format == PAX_FORMAT: self.pax_headers = pax_headers @@ -1893,7 +1810,7 @@ class TarFile(object): except EOFHeaderError: self.fileobj.seek(self.offset) break - except HeaderError, e: + except HeaderError as e: raise ReadError(str(e)) if self.mode in "aw": @@ -1909,18 +1826,6 @@ class TarFile(object): self.closed = True raise - def _getposix(self): - return self.format == USTAR_FORMAT - def _setposix(self, value): - import warnings - warnings.warn("use the format attribute instead", DeprecationWarning, - 2) - if value: - self.format = USTAR_FORMAT - else: - self.format = GNU_FORMAT - posix = property(_getposix, _setposix) - #-------------------------------------------------------------------------- # Below are the classmethods which act as alternate constructors to the # TarFile class. The open() method is the only one that is needed for @@ -1942,18 +1847,22 @@ class TarFile(object): 'r:' open for reading exclusively uncompressed 'r:gz' open for reading with gzip compression 'r:bz2' open for reading with bzip2 compression + 'r:xz' open for reading with lzma compression 'a' or 'a:' open for appending, creating the file if necessary 'w' or 'w:' open for writing without compression 'w:gz' open for writing with gzip compression 'w:bz2' open for writing with bzip2 compression + 'w:xz' open for writing with lzma compression 'r|*' open a stream of tar blocks with transparent compression 'r|' open an uncompressed stream of tar blocks for reading 'r|gz' open a gzip compressed stream of tar blocks 'r|bz2' open a bzip2 compressed stream of tar blocks + 'r|xz' open an lzma compressed stream of tar blocks 'w|' open an uncompressed stream for writing 'w|gz' open a gzip compressed stream for writing 'w|bz2' open a bzip2 compressed stream for writing + 'w|xz' open an lzma compressed stream for writing 'r#gz' open a stream of gzip compressed tar blocks for reading 'w#gz' open a stream of gzip compressed tar blocks for writing @@ -1978,7 +1887,7 @@ class TarFile(object): saved_pos = fileobj.tell() try: return func(name, "r", fileobj, **kwargs) - except (ReadError, CompressionError): + except (ReadError, CompressionError) as e: if fileobj is not None: fileobj.seek(saved_pos) continue @@ -2048,11 +1957,14 @@ class TarFile(object): kwargs['concat_compression'] = True - t = cls(name, filemode, - _Stream(name, filemode, comptype, fileobj, bufsize, - concat_stream=True, enctype=enctype, - password=password, key_length=key_length), - **kwargs) + stream = _Stream(name, filemode, comptype, fileobj, bufsize, + concat_stream=True, enctype=enctype, + password=password, key_length=key_length) + try: + t = cls(name, filemode, stream, **kwargs) + except: + stream.close() + raise t._extfileobj = False return t @@ -2083,16 +1995,21 @@ class TarFile(object): except (ImportError, AttributeError): raise CompressionError("gzip module is not available") - if fileobj is None: - fileobj = bltn_open(name, mode + "b") - + extfileobj = fileobj is not None try: - t = cls.taropen(name, mode, - gzip.GzipFile(name, mode, compresslevel, fileobj), - **kwargs) - except IOError: + fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj) + t = cls.taropen(name, mode, fileobj, **kwargs) + except OSError: + if not extfileobj and fileobj is not None: + fileobj.close() + if fileobj is None: + raise raise ReadError("not a gzip file") - t._extfileobj = False + except: + if not extfileobj and fileobj is not None: + fileobj.close() + raise + t._extfileobj = extfileobj return t @classmethod @@ -2108,23 +2025,46 @@ class TarFile(object): except ImportError: raise CompressionError("bz2 module is not available") - if fileobj is not None: - fileobj = _BZ2Proxy(fileobj, mode) - else: - fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) + fileobj = bz2.BZ2File(fileobj or name, mode, + compresslevel=compresslevel) try: t = cls.taropen(name, mode, fileobj, **kwargs) - except (IOError, EOFError): + except (OSError, EOFError): + fileobj.close() raise ReadError("not a bzip2 file") t._extfileobj = False return t + @classmethod + def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs): + """Open lzma compressed tar archive name for reading or writing. + Appending is not allowed. + """ + if mode not in ("r", "w"): + raise ValueError("mode must be 'r' or 'w'") + + try: + import lzma + except ImportError: + raise CompressionError("lzma module is not available") + + fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset) + + try: + t = cls.taropen(name, mode, fileobj, **kwargs) + except (lzma.LZMAError, EOFError): + fileobj.close() + raise ReadError("not an lzma file") + t._extfileobj = False + return t + # All *open() methods are registered here. OPEN_METH = { "tar": "taropen", # uncompressed tar "gz": "gzopen", # gzip compressed tar - "bz2": "bz2open" # bzip2 compressed tar + "bz2": "bz2open", # bzip2 compressed tar + "xz": "xzopen" # lzma compressed tar } #-------------------------------------------------------------------------- @@ -2204,8 +2144,7 @@ class TarFile(object): if arcname is None: arcname = name drv, arcname = os.path.splitdrive(arcname) - if os.sep != "/": - arcname = arcname.replace(os.sep, "/") + arcname = arcname.replace(os.sep, "/") arcname = arcname.lstrip("/") # Now, fill the TarInfo object with @@ -2262,7 +2201,7 @@ class TarFile(object): if type == REGTYPE: tarinfo.size = statres.st_size else: - tarinfo.size = 0L + tarinfo.size = 0 tarinfo.mtime = statres.st_mtime tarinfo.type = type tarinfo.linkname = linkname @@ -2292,27 +2231,27 @@ class TarFile(object): for tarinfo in self: if verbose: - print filemode(tarinfo.mode), - print "%s/%s" % (tarinfo.uname or tarinfo.uid, - tarinfo.gname or tarinfo.gid), + print(stat.filemode(tarinfo.mode), end=' ') + print("%s/%s" % (tarinfo.uname or tarinfo.uid, + tarinfo.gname or tarinfo.gid), end=' ') if tarinfo.ischr() or tarinfo.isblk(): - print "%10s" % ("%d,%d" \ - % (tarinfo.devmajor, tarinfo.devminor)), + print("%10s" % ("%d,%d" \ + % (tarinfo.devmajor, tarinfo.devminor)), end=' ') else: - print "%10d" % tarinfo.size, - print "%d-%02d-%02d %02d:%02d:%02d" \ - % time.localtime(tarinfo.mtime)[:6], + print("%10d" % tarinfo.size, end=' ') + print("%d-%02d-%02d %02d:%02d:%02d" \ + % time.localtime(tarinfo.mtime)[:6], end=' ') - print tarinfo.name + ("/" if tarinfo.isdir() else ""), + print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ') if verbose: if tarinfo.issym(): - print "->", tarinfo.linkname, + print("->", tarinfo.linkname, end=' ') if tarinfo.islnk(): - print "link to", tarinfo.linkname, - print + print("link to", tarinfo.linkname, end=' ') + print() - def add(self, name, arcname=None, recursive=True, exclude=None, filter=None): + def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None): """Add the file `name' to the archive. `name' may be any type of file (directory, fifo, symbolic link, etc.). If given, `arcname' specifies an alternative name for the file in the archive. @@ -2368,18 +2307,18 @@ class TarFile(object): if recursive: for f in os.listdir(name): self.add(os.path.join(name, f), os.path.join(arcname, f), - recursive, exclude, filter) + recursive, exclude, filter=filter) else: self.addfile(tarinfo) def _size_left(self): - ''' - Calculates size left, assumes self.max_volume_size is set - ''' + """Calculates size left in a volume with a maximum volume size. + Assumes self.max_volume_size is set. + """ size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset # limit size left to a discrete number of blocks, because we won't - # write only half a block when writing the end of a volume + # write only half a block when writting the end of a volume # and filling with zeros blocks, remainder = divmod(size_left, BLOCKSIZE) return blocks*BLOCKSIZE @@ -2538,7 +2477,7 @@ class TarFile(object): except EOFHeaderError: self.fileobj.seek(self.offset) break - except HeaderError, e: + except HeaderError as e: raise ReadError(str(e)) if self.mode in "aw": @@ -2546,9 +2485,9 @@ class TarFile(object): if self.format == PAX_FORMAT: volume_info = { - "GNU.volume.filename": unicode(self.volume_tarinfo.name), - "GNU.volume.size": unicode(self.volume_tarinfo.size - self.volume_tarinfo.volume_offset), - "GNU.volume.offset": unicode(self.volume_tarinfo.volume_offset), + "GNU.volume.filename": str(self.volume_tarinfo.name), + "GNU.volume.size": str(self.volume_tarinfo.size - self.volume_tarinfo.volume_offset), + "GNU.volume.offset": str(self.volume_tarinfo.volume_offset), } self.pax_headers.update(volume_info) @@ -2585,11 +2524,12 @@ class TarFile(object): # Extract directories with a safe mode. directories.append(tarinfo) tarinfo = copy.copy(tarinfo) - tarinfo.mode = 0700 - self.extract(tarinfo, path) + tarinfo.mode = 0o0700 + # Do not set_attrs directories, as we will do that further down + self.extract(tarinfo, path, set_attrs=not tarinfo.isdir()) # Reverse sort directories. - directories.sort(key=operator.attrgetter('name')) + directories.sort(key=lambda a: a.name) directories.reverse() # Set correct owner, mtime and filemode on directories. @@ -2599,21 +2539,22 @@ class TarFile(object): self.chown(tarinfo, dirpath) self.utime(tarinfo, dirpath) self.chmod(tarinfo, dirpath) - except ExtractError, e: + except ExtractError as e: if self.errorlevel > 1: raise else: self._dbg(1, "tarfile: %s" % e) - def extract(self, member, path=""): + def extract(self, member, path="", set_attrs=True): """Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. `member' may be a filename or a TarInfo object. You can - specify a different directory using `path'. + specify a different directory using `path'. File attributes (owner, + mtime, mode) are set unless `set_attrs' is False. """ self._check("r") - if isinstance(member, basestring): + if isinstance(member, str): tarinfo = self.getmember(member) else: tarinfo = member @@ -2623,8 +2564,9 @@ class TarFile(object): tarinfo._link_target = os.path.join(path, tarinfo.linkname) try: - self._extract_member(tarinfo, os.path.join(path, tarinfo.name)) - except EnvironmentError, e: + self._extract_member(tarinfo, os.path.join(path, tarinfo.name), + set_attrs=set_attrs) + except EnvironmentError as e: if self.errorlevel > 0: raise else: @@ -2632,7 +2574,7 @@ class TarFile(object): self._dbg(1, "tarfile: %s" % e.strerror) else: self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) - except ExtractError, e: + except ExtractError as e: if self.errorlevel > 1: raise else: @@ -2640,24 +2582,19 @@ class TarFile(object): def extractfile(self, member): """Extract a member from the archive as a file object. `member' may be - a filename or a TarInfo object. If `member' is a regular file, a - file-like object is returned. If `member' is a link, a file-like - object is constructed from the link's target. If `member' is none of - the above, None is returned. - The file-like object is read-only and provides the following - methods: read(), readline(), readlines(), seek() and tell() + a filename or a TarInfo object. If `member' is a regular file or a + link, an io.BufferedReader object is returned. Otherwise, None is + returned. """ self._check("r") - if isinstance(member, basestring): + if isinstance(member, str): tarinfo = self.getmember(member) else: tarinfo = member - if tarinfo.isreg() or tarinfo.ismultivol(): - return self.fileobject(self, tarinfo) - - elif tarinfo.type not in SUPPORTED_TYPES: + if tarinfo.isreg() or tarinfo.ismultivol() or\ + tarinfo.type not in SUPPORTED_TYPES: # If a member's type is unknown, it is treated as a # regular file. return self.fileobject(self, tarinfo) @@ -2676,7 +2613,7 @@ class TarFile(object): # blkdev, etc.), return None instead of a file object. return None - def _extract_member(self, tarinfo, targetpath): + def _extract_member(self, tarinfo, targetpath, set_attrs=True): """Extract the TarInfo object tarinfo to a physical file called targetpath. """ @@ -2713,10 +2650,11 @@ class TarFile(object): else: self.makefile(tarinfo, targetpath) - self.chown(tarinfo, targetpath) - if not tarinfo.issym(): - self.chmod(tarinfo, targetpath) - self.utime(tarinfo, targetpath) + if set_attrs: + self.chown(tarinfo, targetpath) + if not tarinfo.issym(): + self.chmod(tarinfo, targetpath) + self.utime(tarinfo, targetpath) #-------------------------------------------------------------------------- # Below are the different file methods. They are called via @@ -2729,18 +2667,29 @@ class TarFile(object): try: # Use a safe mode for the directory, the real mode is set # later in _extract_member(). - os.mkdir(targetpath, 0700) - except EnvironmentError, e: - if e.errno != errno.EEXIST: - raise + os.mkdir(targetpath, 0o0700) + except FileExistsError: + pass def makefile(self, tarinfo, targetpath): """Make a file called targetpath. """ - source = self.extractfile(tarinfo) + source = self.fileobj + source.seek(tarinfo.offset_data) iterate = True target = bltn_open(targetpath, "wb") + if tarinfo.sparse is not None: + try: + for offset, size in tarinfo.sparse: + target.seek(offset) + copyfileobj(source, target, size) + target.seek(tarinfo.size) + target.truncate() + finally: + target.close() + return + while iterate: iterate = False try: @@ -2758,8 +2707,8 @@ class TarFile(object): # to self.open_volume self.volume_number += 1 self.new_volume_handler(self, self.base_name, self.volume_number) - tarinfo = self.next() - source = self.extractfile(tarinfo) + tarinfo = self.firstmember + source = self.fileobj iterate = True target.close() @@ -2801,23 +2750,21 @@ class TarFile(object): (platform limitation), we try to make a copy of the referenced file instead of a link. """ - if hasattr(os, "symlink") and hasattr(os, "link"): + try: # For systems that support symbolic and hard links. if tarinfo.issym(): - if os.path.lexists(targetpath): - os.unlink(targetpath) os.symlink(tarinfo.linkname, targetpath) else: # See extract(). if os.path.exists(tarinfo._link_target): - if os.path.lexists(targetpath): - os.unlink(targetpath) os.link(tarinfo._link_target, targetpath) else: - self._extract_member(self._find_link_target(tarinfo), targetpath) - else: + self._extract_member(self._find_link_target(tarinfo), + targetpath) + except symlink_exception: try: - self._extract_member(self._find_link_target(tarinfo), targetpath) + self._extract_member(self._find_link_target(tarinfo), + targetpath) except KeyError: raise ExtractError("unable to resolve link inside archive") @@ -2838,9 +2785,8 @@ class TarFile(object): if tarinfo.issym() and hasattr(os, "lchown"): os.lchown(targetpath, u, g) else: - if sys.platform != "os2emx": - os.chown(targetpath, u, g) - except EnvironmentError: + os.chown(targetpath, u, g) + except OSError as e: raise ExtractError("could not change owner") def chmod(self, tarinfo, targetpath): @@ -2849,7 +2795,7 @@ class TarFile(object): if hasattr(os, 'chmod'): try: os.chmod(targetpath, tarinfo.mode) - except EnvironmentError: + except OSError as e: raise ExtractError("could not change mode") def utime(self, tarinfo, targetpath): @@ -2859,7 +2805,7 @@ class TarFile(object): return try: os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) - except EnvironmentError: + except OSError as e: raise ExtractError("could not change modification time") #-------------------------------------------------------------------------- @@ -2874,21 +2820,18 @@ class TarFile(object): self.firstmember = None return m - # Read the next block, unless there's none - if isinstance(self.fileobj, _Stream) and self.offset < self.fileobj.pos: - return None - else: - self.fileobj.seek(self.offset) + # Read the next block. + self.fileobj.seek(self.offset) tarinfo = None while True: try: tarinfo = self.tarinfo.fromtarfile(self) - except EOFHeaderError, e: + except EOFHeaderError as e: if self.ignore_zeros: self._dbg(2, "0x%X: %s" % (self.offset, e)) self.offset += BLOCKSIZE continue - except InvalidHeaderError, e: + except InvalidHeaderError as e: if self.ignore_zeros: self._dbg(2, "0x%X: %s" % (self.offset, e)) self.offset += BLOCKSIZE @@ -2898,10 +2841,10 @@ class TarFile(object): except EmptyHeaderError: if self.offset == 0: raise ReadError("empty file") - except TruncatedHeaderError, e: + except TruncatedHeaderError as e: if self.offset == 0: raise ReadError(str(e)) - except SubsequentHeaderError, e: + except SubsequentHeaderError as e: raise ReadError(str(e)) break @@ -2954,9 +2897,9 @@ class TarFile(object): corresponds to TarFile's mode. """ if self.closed: - raise IOError("%s is closed" % self.__class__.__name__) + raise OSError("%s is closed" % self.__class__.__name__) if mode is not None and self.mode not in mode: - raise IOError("bad operation for mode %r" % self.mode) + raise OSError("bad operation for mode %r" % self.mode) def _find_link_target(self, tarinfo): """Find the target member of a symlink or hardlink member in the @@ -2989,7 +2932,7 @@ class TarFile(object): """Write debugging output to sys.stderr. """ if level <= self.debug: - print >> sys.stderr, msg + print(msg, file=sys.stderr) def __enter__(self): self._check() @@ -3022,7 +2965,7 @@ class TarIter: """Return iterator object. """ return self - def next(self): + def __next__(self): """Return the next item using TarFile's next() method. When all members have been read, set TarFile as _loaded. """ @@ -3045,102 +2988,6 @@ class TarIter: return tarinfo -# Helper classes for sparse file support -class _section: - """Base class for _data and _hole. - """ - def __init__(self, offset, size): - self.offset = offset - self.size = size - def __contains__(self, offset): - return self.offset <= offset < self.offset + self.size - -class _data(_section): - """Represent a data section in a sparse file. - """ - def __init__(self, offset, size, realpos): - _section.__init__(self, offset, size) - self.realpos = realpos - -class _hole(_section): - """Represent a hole section in a sparse file. - """ - pass - -class _ringbuffer(list): - """Ringbuffer class which increases performance - over a regular list. - """ - def __init__(self): - self.idx = 0 - def find(self, offset): - idx = self.idx - while True: - item = self[idx] - if offset in item: - break - idx += 1 - if idx == len(self): - idx = 0 - if idx == self.idx: - # End of File - return None - self.idx = idx - return item - -#--------------------------------------------- -# zipfile compatible TarFile class -#--------------------------------------------- -TAR_PLAIN = 0 # zipfile.ZIP_STORED -TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED -class TarFileCompat: - """TarFile class compatible with standard module zipfile's - ZipFile class. - """ - def __init__(self, file, mode="r", compression=TAR_PLAIN): - from warnings import warnpy3k - warnpy3k("the TarFileCompat class has been removed in Python 3.0", - stacklevel=2) - if compression == TAR_PLAIN: - self.tarfile = TarFile.taropen(file, mode) - elif compression == TAR_GZIPPED: - self.tarfile = TarFile.gzopen(file, mode) - else: - raise ValueError("unknown compression constant") - if mode[0:1] == "r": - members = self.tarfile.getmembers() - for m in members: - m.filename = m.name - m.file_size = m.size - m.date_time = time.gmtime(m.mtime)[:6] - def namelist(self): - return map(lambda m: m.name, self.infolist()) - def infolist(self): - return filter(lambda m: m.type in REGULAR_TYPES, - self.tarfile.getmembers()) - def printdir(self): - self.tarfile.list() - def testzip(self): - return - def getinfo(self, name): - return self.tarfile.getmember(name) - def read(self, name): - return self.tarfile.extractfile(self.tarfile.getmember(name)).read() - def write(self, filename, arcname=None, compress_type=None): - self.tarfile.add(filename, arcname) - def writestr(self, zinfo, bytes): - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - import calendar - tinfo = TarInfo(zinfo.filename) - tinfo.size = len(bytes) - tinfo.mtime = calendar.timegm(zinfo.date_time) - self.tarfile.addfile(tinfo, StringIO(bytes)) - def close(self): - self.tarfile.close() -#class TarFileCompat #-------------------- # exported functions diff --git a/filesplit.py b/filesplit.py index 2bddd2a..1aa5c9a 100644 --- a/filesplit.py +++ b/filesplit.py @@ -28,19 +28,19 @@ def split_file(separator, prefix, input_file, new_file_func=None): ''' i = 0 pos = 0 - buf = "" + buf = b"" sep_len = len(separator) if sep_len == 0: raise Exception("empty separator") if new_file_func is None: - new_file_func = lambda prefix, i: open(prefix + str(i), 'w') + new_file_func = lambda prefix, i: open(prefix + str(i), 'wb') output = new_file_func(prefix, i) # buffered search. we try not to have the while input file in memory, as # it's not needed - with open(input_file, 'r') as f: + with open(input_file, 'rb') as f: while True: buf += f.read(BUFSIZE) if len(buf) == 0: @@ -71,13 +71,13 @@ def split_file(separator, prefix, input_file, new_file_func=None): # else: continue writing to the current output and iterate output.write(buf) - buf = "" + buf = b"" output.close() def chunk_file(input_file, output_file, from_pos, to_pos): - ifl = open(input_file, 'r') - ofl = open(output_file, 'w') + ifl = open(input_file, 'rb') + ofl = open(output_file, 'wb') ifl.seek(from_pos) ofl.write(ifl.read(to_pos-from_pos)) @@ -99,4 +99,5 @@ if __name__ == "__main__": chunk_file(input_file=args.input_file, output_file=args.output, from_pos=args.from_pos, to_pos=args.to_pos) else: - split_file(separator=args.separator, prefix=args.prefix, input_file=args.input_file) + split_file(separator=bytes(args.separator, 'UTF-8'), prefix=args.prefix, + input_file=args.input_file) diff --git a/rescue_tar.py b/rescue_tar.py index 9931f60..906fc27 100644 --- a/rescue_tar.py +++ b/rescue_tar.py @@ -31,14 +31,14 @@ def rescue(tar_files, rescue_dir=None): format (compression, etc). Assumes it to be multivolume tar. ''' # setup rescue_dir - if isinstance(tar_files, basestring): + if isinstance(tar_files, str): tar_files = [tar_files] if not isinstance(tar_files, list): raise Exception("tar_files must be a list") for f in tar_files: - if not isinstance(f, basestring): + if not isinstance(f, str): raise Exception("tar_files must be a list of strings") if not os.path.exists(f): raise Exception("tar file '%s' doesn't exist" % f) @@ -71,12 +71,12 @@ def rescue(tar_files, rescue_dir=None): path = "%s.%d" %(prefix, context['num']) extract_files.append(path) context['num'] += 1 - return open(path, 'w') + return open(path, 'wb') new_gz = partial(new_gz, context, extract_files) # split in compressed chunks for f in tar_files: - filesplit.split_file('\x1f\x8b', + filesplit.split_file(b'\x1f\x8b', os.path.join(rescue_dir, base_name), f, new_gz) # includes volumes already extracted with new_volume_handler diff --git a/runtests.py b/runtests.py old mode 100644 new mode 100755 index 0df4650..0d7f99c --- a/runtests.py +++ b/runtests.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright (C) 2013 Intra2net AG # diff --git a/testing/__init__.py b/testing/__init__.py index cd86323..dd27771 100644 --- a/testing/__init__.py +++ b/testing/__init__.py @@ -42,9 +42,9 @@ class BaseTest(unittest.TestCase): file. File path and length are specified as function arguments. ''' f = open(path, 'w') - s = string.lowercase + string.digits + "\n" + s = string.ascii_lowercase + string.digits + "\n" if len(s) < length: - s += s*(length/len(s)) + s += s*int(length/len(s)) data = s[:length] f.write(data) f.close() diff --git a/testing/test_concat_compress.py b/testing/test_concat_compress.py index b2e1238..a594884 100644 --- a/testing/test_concat_compress.py +++ b/testing/test_concat_compress.py @@ -93,7 +93,7 @@ class ConcatCompressTest(BaseTest): tarobj.close() os.unlink("big") - fo = open("sample.tar.gz", 'r') + fo = open("sample.tar.gz", 'rb') fo.seek(pos) tarobj = TarFile.open(mode="r#gz", fileobj=fo) tarobj.extract(tarobj.next()) @@ -130,7 +130,7 @@ class ConcatCompressTest(BaseTest): os.unlink("small2") # extract only the "small" file - fo = open("sample.tar.gz", 'r') + fo = open("sample.tar.gz", 'rb') fo.seek(pos) tarobj = TarFile.open(mode="r#gz", fileobj=fo) tarobj.extract(tarobj.next()) @@ -179,7 +179,7 @@ class ConcatCompressTest(BaseTest): tarobj.open_volume(volume_path) # extract only the "small" file - fo = open("sample.tar.gz", 'r') + fo = open("sample.tar.gz", 'rb') fo.seek(pos) tarobj = TarFile.open(mode="r#gz", fileobj=fo, concat_compression=True, @@ -226,7 +226,7 @@ class ConcatCompressTest(BaseTest): tarobj.extractall() tarobj.close() - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -263,7 +263,7 @@ class ConcatCompressTest(BaseTest): tarobj.extractall() tarobj.close() - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -305,7 +305,7 @@ class ConcatCompressTest(BaseTest): tarobj.close() # check output - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -337,7 +337,7 @@ class ConcatCompressTest(BaseTest): os.unlink("small") os.unlink("small2") - filesplit.split_file('\x1f\x8b', "sample.tar.gz.", "sample.tar.gz") + filesplit.split_file(b'\x1f\x8b', "sample.tar.gz.", "sample.tar.gz") assert os.path.exists("sample.tar.gz.0") # beginning of the tar file assert os.path.exists("sample.tar.gz.1") # first file @@ -346,13 +346,13 @@ class ConcatCompressTest(BaseTest): assert not os.path.exists("sample.tar.gz.4") # nothing else # extract and check output - for i in xrange(1, 4): + for i in range(1, 4): tarobj = TarFile.open("sample.tar.gz.%d" % i, mode="r|gz") tarobj.extractall() tarobj.close() - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -395,11 +395,11 @@ class ConcatCompressTest(BaseTest): assert not os.path.exists("sample.tar.gz.4") # nothing else # extract and check output - for i in xrange(1, 4): + for i in range(1, 4): os.system("gzip -cd sample.tar.gz.%d > sample.%d.tar" % (i, i)) os.system("tar xf sample.%d.tar" % i) - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -429,9 +429,9 @@ class ConcatCompressTest(BaseTest): assert os.path.exists("sample.tar.gz") # overwrite stuff in the middle of the big file - f = open('sample.tar.gz', 'r+b') + f = open('sample.tar.gz', 'wb') f.seek(100) - f.write("breaking things") + f.write(bytes("breaking things", 'UTF-8')) f.close() os.unlink("big") @@ -439,7 +439,7 @@ class ConcatCompressTest(BaseTest): os.unlink("small2") # equivalent to $ python filesplit.py -s $'\x1f\x8b' -p sample.tar.gz. sample.tar.gz - filesplit.split_file('\x1f\x8b', "sample.tar.gz.", "sample.tar.gz") + filesplit.split_file(b'\x1f\x8b', "sample.tar.gz.", "sample.tar.gz") assert os.path.exists("sample.tar.gz.0") # beginning of the tar file assert os.path.exists("sample.tar.gz.1") # first file @@ -448,7 +448,7 @@ class ConcatCompressTest(BaseTest): assert not os.path.exists("sample.tar.gz.4") # nothing else # extract and check output - for i in xrange(1, 4): + for i in range(1, 4): try: tarobj = TarFile.open("sample.tar.gz.%d" % i, mode="r|gz") @@ -460,7 +460,7 @@ class ConcatCompressTest(BaseTest): else: raise Exception("Error extracting a tar.gz not related to the broken 'big' file") - for key, value in hash.iteritems(): + for key, value in hash.items(): if key != "big": assert os.path.exists(key) assert value == self.md5sum(key) diff --git a/testing/test_deltatar.py b/testing/test_deltatar.py index b5c6802..7c6e73f 100644 --- a/testing/test_deltatar.py +++ b/testing/test_deltatar.py @@ -87,7 +87,7 @@ class DeltaTarTest(BaseTest): deltatar.restore_backup(target_path="source_dir", backup_tar_path=tar_path) - for key, value in self.hash.iteritems(): + for key, value in self.hash.items(): assert os.path.exists(key) if value: assert value == self.md5sum(key) @@ -108,19 +108,22 @@ class DeltaTarTest(BaseTest): index_filename = deltatar.index_name_func(True) index_path = os.path.join("backup_dir", index_filename) - f = open(index_path, 'r') + f = open(index_path, 'rb') crc = None checked = False began_list = False - for l in f.readline(): - if 'BEGIN-FILE-LIST' in l: + while True: + l = f.readline() + if l == b'': + break + if b'BEGIN-FILE-LIST' in l: crc = binascii.crc32(l) began_list = True - elif 'END-FILE-LIST' in l: + elif b'END-FILE-LIST' in l: crc = binascii.crc32(l, crc) & 0xffffffff # next line contains the crc - data = json.loads(f.readline()) + data = json.loads(f.readline().decode("UTF-8")) assert data['type'] == 'file-list-checksum' assert data['checksum'] == crc checked = True @@ -164,7 +167,7 @@ class DeltaTarTest(BaseTest): deltatar.restore_backup(target_path="source_dir2", backup_tar_path=tar_path) - for key, value in self.hash.iteritems(): + for key, value in self.hash.items(): assert os.path.exists(key) if value: assert value == self.md5sum(key) @@ -244,13 +247,13 @@ class DeltaTarTest(BaseTest): l = f.readline() if not len(l): break - data = json.loads(l) + data = json.loads(l.decode('UTF-8')) if data.get('type', '') == 'file' and\ deltatar.unprefixed(data['path']) == "./huge": offset = data['offset'] break - fo = open(tar_path, 'r') + fo = open(tar_path, 'rb') fo.seek(offset) def new_volume_handler(mode, tarobj, base_name, volume_number): tarobj.open_volume(datetime.now().strftime( @@ -296,7 +299,7 @@ class DeltaTarTest(BaseTest): deltatar.restore_backup(target_path="source_dir", backup_indexes_paths=[index_path]) - for key, value in self.hash.iteritems(): + for key, value in self.hash.items(): assert os.path.exists(key) if value: assert value == self.md5sum(key) @@ -327,7 +330,7 @@ class DeltaTarTest(BaseTest): deltatar.restore_backup(target_path="source_dir", backup_indexes_paths=[index_path]) - for key, value in self.hash.iteritems(): + for key, value in self.hash.items(): assert os.path.exists(key) if value: assert value == self.md5sum(key) @@ -967,7 +970,7 @@ class DeltaTarTest(BaseTest): # create source_dir with the small file, that will be then deleted by # the restore_backup os.mkdir("source_dir") - open("source_dir/small", 'w').close() + open("source_dir/small", 'wb').close() tar_filename = deltatar.volume_name_func('backup_dir2', is_full=False, volume_number=0) @@ -1221,19 +1224,19 @@ class DeltaTarTest(BaseTest): target_it = deltatar.jsonize_path_iterator(target_it, strip=1) while True: try: - sitem = source_it.next() - titem = target_it.next() + sitem = next(source_it) + titem = next(target_it) except StopIteration: try: - titem = target_it.next() + titem = next(target_it) raise Exception("iterators do not stop at the same time") except StopIteration: break try: assert deltatar._equal_stat_dicts(sitem, titem) - except Exception, e: - print sitem - print titem + except Exception as e: + print(sitem) + print(titem) raise e class DeltaTar2Test(DeltaTarTest): diff --git a/testing/test_encryption.py b/testing/test_encryption.py index 03b597a..432acdf 100644 --- a/testing/test_encryption.py +++ b/testing/test_encryption.py @@ -50,7 +50,7 @@ class EncryptionTest(BaseTest): os.unlink("big") # extract with normal tar and check output - filesplit.split_file('Salted__', "sample.tar.gz.aes.", "sample.tar.gz.aes128") + filesplit.split_file(b'Salted__', "sample.tar.gz.aes.", "sample.tar.gz.aes128") assert os.path.exists("sample.tar.gz.aes.0") # beginning of the tar file assert os.path.exists("sample.tar.gz.aes.1") # first file @@ -89,7 +89,7 @@ class EncryptionTest(BaseTest): os.unlink(k) # extract with normal tar and check output - filesplit.split_file('Salted__', "sample.tar.gz.aes.", "sample.tar.gz.aes128") + filesplit.split_file(b'Salted__', "sample.tar.gz.aes.", "sample.tar.gz.aes128") assert os.path.exists("sample.tar.gz.aes.0") # beginning of the tar file assert os.path.exists("sample.tar.gz.aes.1") # first file @@ -98,13 +98,13 @@ class EncryptionTest(BaseTest): assert not os.path.exists("sample.tar.gz.aes.4") # nothing else # extract and check output - for i in xrange(1, 4): + for i in range(1, 4): fname = "sample.tar.gz.aes.%d" % i os.system("openssl aes-128-cbc -nopad -k 'key' -d -in %s -out sample.tar.gz" % fname) os.system("zcat sample.tar.gz 2>/dev/null > sample.tar") os.system("tar xf sample.tar") - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -170,7 +170,7 @@ class EncryptionTest(BaseTest): tarobj.extractall() tarobj.close() - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -211,7 +211,7 @@ class EncryptionTest(BaseTest): tarobj.close() # check output - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -252,7 +252,7 @@ class EncryptionTest(BaseTest): tarobj.close() # check output - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -279,7 +279,7 @@ class EncryptionTest(BaseTest): os.unlink("big") # extract with normal tar and check output - filesplit.split_file('Salted__', "sample.tar.gz.aes.", "sample.tar.gz.aes256") + filesplit.split_file(b'Salted__', "sample.tar.gz.aes.", "sample.tar.gz.aes256") assert os.path.exists("sample.tar.gz.aes.0") # beginning of the tar file assert os.path.exists("sample.tar.gz.aes.1") # first file diff --git a/testing/test_multivol.py b/testing/test_multivol.py index b08656d..0389139 100644 --- a/testing/test_multivol.py +++ b/testing/test_multivol.py @@ -164,7 +164,7 @@ class MultivolGnuFormatTest(BaseTest): # extract with normal tar and check output os.system("tar xfM sample.tar --file=sample.tar.1 --file=sample.tar.2") - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -250,7 +250,7 @@ class MultivolGnuFormatTest(BaseTest): assert os.path.exists("sample.tar.1") assert not os.path.exists("sample.tar.2") - for key, value in hash.iteritems(): + for key, value in hash.items(): os.unlink(key) assert not os.path.exists(key) @@ -261,7 +261,7 @@ class MultivolGnuFormatTest(BaseTest): tarobj.extractall() tarobj.close() - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -304,7 +304,7 @@ class MultivolGnuFormatTest(BaseTest): tarobj.extractall() tarobj.close() - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -344,7 +344,7 @@ class MultivolGnuFormatTest(BaseTest): tarobj.extractall() tarobj.close() - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -474,7 +474,7 @@ class MultivolGnuFormatTest(BaseTest): assert os.path.exists("sample.tar") assert not os.path.exists("sample.tar.1") - for key, value in hash.iteritems(): + for key, value in hash.items(): os.unlink(key) assert not os.path.exists(key) @@ -485,7 +485,7 @@ class MultivolGnuFormatTest(BaseTest): tarobj.extractall() tarobj.close() - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -515,7 +515,7 @@ class MultivolGnuFormatTest(BaseTest): assert os.path.exists("sample.tar.1") assert not os.path.exists("sample.tar.2") - for key, value in hash.iteritems(): + for key, value in hash.items(): os.unlink(key) assert not os.path.exists(key) @@ -526,7 +526,7 @@ class MultivolGnuFormatTest(BaseTest): tarobj.extractall() tarobj.close() - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) diff --git a/testing/test_rescue_tar.py b/testing/test_rescue_tar.py index e9c48eb..cb0764f 100644 --- a/testing/test_rescue_tar.py +++ b/testing/test_rescue_tar.py @@ -61,7 +61,7 @@ class RescueTarTest(BaseTest): rescue_tar.rescue("sample.tar.gz") # check output - for key, value in hash.iteritems(): + for key, value in hash.items(): assert os.path.exists(key) assert value == self.md5sum(key) @@ -95,7 +95,7 @@ class RescueTarTest(BaseTest): # overwrite stuff in the middle of the big file f = open('sample.tar.gz', 'r+b') f.seek(100) - f.write("breaking things") + f.write(bytes("breaking things", "UTF-8")) f.close() os.unlink("big") @@ -107,7 +107,7 @@ class RescueTarTest(BaseTest): rescue_tar.rescue("sample.tar.gz") # check output - for key, value in hash.iteritems(): + for key, value in hash.items(): if key == "big": continue assert os.path.exists(key) -- 1.7.1