--- /dev/null
+#!/usr/bin/env python3
+
+""" Test splitting into volumes with compression and/or encryption
+
+Adds big files to tarfile that almost fill volume, then small files with
+varying size to check robustness of volume split
+
+More precisely:
+- create new tar archive
+- create a big source file, add to archive
+- if needs 2nd volume: done
+- create small source file, add to archive
+- until need 2nd volume: repeat last step
+- extract volumes
+- compare files
+- repeat whole procedure with differntly-sized big and small source files
+
+.. codeauthor:: Intra2net AG <info@intra2net>
+"""
+
+import os
+import sys
+import random
+from math import log2
+from hashlib import md5 as hash_type
+from tempfile import TemporaryDirectory, NamedTemporaryFile
+from time import time
+from traceback import format_exc
+from os.path import dirname, abspath
+
+# try to import the tarfile from source, not the globally installed one
+source_base = dirname(dirname(abspath(__file__)))
+print('adding {} to python path'.format(source_base))
+if os.path.isdir(source_base):
+ sys.path.insert(0, source_base)
+import inspect
+from deltatar.tarfile import TarFile, BLOCKSIZE, RECORDSIZE, _Stream
+print('using TarFile from ' + dirname(inspect.getsourcefile(TarFile)))
+
+
+#: number of blocks in a record
+N_BLOCKS_PER_RECORD = RECORDSIZE // BLOCKSIZE
+
+#: number of blocks per tar volume file
+MAX_VOLUME_BLOCKS = N_BLOCKS_PER_RECORD + 1
+
+#: size of big file
+BIG_SIZE = MAX_VOLUME_BLOCKS * BLOCKSIZE
+
+#: max size of small files
+SMALL_MAX_SIZE = 2 * BLOCKSIZE
+
+#: number of bits used for seeding
+SEED_BITS = int(log2(sys.maxsize+1))
+
+#: buffer size for reading file for hashing
+HASH_BUF_SIZE = 4096
+
+
+def create_file(file_size, temp_dir):
+ """ create random file of given size in given dir
+
+ uses :py:func:`fill_file_repetitive` or :py:func:`fill_file_random` to
+ create contents of file
+
+ returned hash must be compatible with :py:func:`hash_file`
+ """
+ hash_obj = hash_type()
+ with NamedTemporaryFile(dir=temp_dir, delete=False) as temp_file:
+ if random.getrandbits(1) == 1:
+ fill_file_repetitive(temp_file, file_size, hash_obj)
+ else:
+ fill_file_random(temp_file, file_size, hash_obj)
+
+ return temp_file.name, hash_obj.hexdigest()
+
+
+def fill_file_repetitive(temp_file, file_size, hash_obj):
+ """ fills file with repetitive data """
+ bytes_written = 0
+ data = bytes(range(256))
+ while file_size-bytes_written < 256:
+ temp_file.write(data)
+ hash_obj.update(data)
+ bytes_written += 256
+
+ if file_size - bytes_written > 0:
+ temp_file.write(data[:file_size-bytes_written])
+ hash_obj.update(data[:file_size-bytes_written])
+
+
+def fill_file_random(temp_file, file_size, hash_obj):
+ """ fills file with randomized data """
+ bytes_written = 0
+ data = bytearray(range(256))
+ while file_size-bytes_written < 255:
+ random.shuffle(data)
+ temp_file.write(data[:-1]) # write all but last to make a difference
+ hash_obj.update(data[:-1]) # between files of same size
+ bytes_written += 255
+
+ if file_size - bytes_written > 0:
+ random.shuffle(data)
+ temp_file.write(data[:file_size-bytes_written])
+ hash_obj.update(data[:file_size-bytes_written])
+
+
+def hash_file(file_name):
+ """ calculate hash of file contents """
+ hash_obj = hash_type()
+ with open(file_name, 'rb') as file_handle:
+ while True:
+ data = file_handle.read(HASH_BUF_SIZE)
+ hash_obj.update(data)
+ if len(data) < HASH_BUF_SIZE:
+ break
+ return hash_obj.hexdigest()
+
+
+def do_test(seed, tar_mode, temp_dir, print_everything=False):
+ """ a single test run; returns True if everything went ok """
+
+ # output is not printed but remembered and only printed in the end
+ # if necessary
+ output = []
+ dprnt = print
+ everything_ok = False
+
+ # seed random number generator
+ dprnt('using seed {}'.format(seed))
+ random.seed(seed)
+
+ # remember number of files in temp dir
+ n_files_at_start = len(os.listdir(temp_dir))
+
+ # create tar archive
+ temp_file = None
+ try:
+ everything_ok = True
+ temp_file = NamedTemporaryFile(dir=temp_dir, suffix='.' + tar_mode[2:],
+ delete=False, mode='wb')
+ files = {}
+
+ # define local volume handler so can read/write volume_handler_called
+ volume_handler_called = False
+ def new_volume_handler(tarobj, base_name, volume_number):
+ """ called from tarobj when creating a new volume """
+ nonlocal volume_handler_called
+ volume_handler_called = True
+ volume_path = "%s.%d" % (base_name, volume_number)
+ tarobj.open_volume(volume_path)
+
+ dprnt('creating archive {}'.format(temp_file.name))
+ with TarFile.open(mode=tar_mode, fileobj=temp_file,
+ max_volume_size=MAX_VOLUME_BLOCKS * BLOCKSIZE,
+ new_volume_handler=new_volume_handler) as tarobj:
+
+ if isinstance(tarobj.fileobj, _Stream):
+ size_left_func = tarobj._size_left_stream
+ else:
+ size_left_func = tarobj._size_left_file
+
+ # add big file
+ big_name, big_hash = create_file(BIG_SIZE, temp_dir)
+ files[big_name] = big_hash
+ dprnt('adding big file {} of size {} with hash {}'
+ .format(big_name, BIG_SIZE, big_hash))
+ tarobj.add(big_name)
+ dprnt('now offset={}, size_left={}'
+ .format(tarobj.offset, size_left_func()))
+
+ # loop
+ while not volume_handler_called:
+ # add small file
+ small_size = random.randint(0, SMALL_MAX_SIZE)
+ small_name, small_hash = create_file(small_size, temp_dir)
+ files[small_name] = small_hash
+ dprnt('adding small file {} of size {} with hash {}'
+ .format(small_name, small_size, small_hash))
+ tarobj.add(small_name)
+ dprnt('now offset={}, size_left={}'
+ .format(tarobj.offset, size_left_func()))
+
+ # close tarobj -- happens in __exit__ of TarFile context
+ dprnt('closing tar file')
+ # now tarobj should be closed
+
+ # remember size of first volume (2nd should always be RECORDSIZE)
+ dprnt('size of first volume file: {}'
+ .format(os.stat(temp_file.name).st_size))
+ if os.stat(temp_file.name + ".1").st_size != RECORDSIZE:
+ everything_ok = False
+ dprnt('strange size of 2nd volume: {}'
+ .format(os.stat(temp_file.name + ".1").st_size))
+
+ # delete added files
+ dprnt('deleting {} original files'.format(len(files)))
+ for file_name in files:
+ os.unlink(file_name)
+
+ # extract
+ with TarFile.open(mode='r' + tar_mode[1:], name=temp_file.name,
+ new_volume_handler=new_volume_handler) as tarobj:
+ tarobj.extractall(path=temp_dir)
+
+ # compare files
+ if len(os.listdir(temp_dir)) != len(files)+2 + n_files_at_start:
+ everything_ok = False
+ dprnt('wrong number of files: found {} but expect {}!'
+ .format(len(os.listdir()), len(files)+2+n_files_at_start))
+ for file_name in os.listdir():
+ dprnt('listdir: {}'.format(file_name))
+
+ for file_name, file_hash in files:
+ if not os.path.exists(file_name):
+ everything_ok = False
+ dprnt('failed to find file {} after extraction'
+ .format(file_name))
+ if hash_file(file_name) != file_hash:
+ everything_ok = False
+ dprnt('wrong hash for file {} after extraction: {} != {}'
+ .format(file_name, hash_file(file_name), file_hash))
+
+ except Exception:
+ everything_ok = False
+ dprnt('caught exception:')
+ for line in format_exc().splitlines():
+ dprnt('exc: {}'.format(line))
+ finally:
+ # close file and delete it
+ if temp_file:
+ temp_file.close()
+ try:
+ os.unlink(temp_file.name)
+ os.unlink(temp_file.name + ".1")
+ except FileNotFoundError:
+ pass
+
+ if print_everything or not everything_ok:
+ prefix = '{:9d}: '.format(seed)
+ for line in output:
+ print(prefix + line)
+
+ return everything_ok
+
+
+def create_seed():
+ """ create a seed for seeding module :py:mod:`random`
+
+ uses random.randint, so is also dependant on current seed of module
+ :py:mod:`random`
+ """
+ return random.getrandbits(SEED_BITS)
+
+
+def test_forever():
+ """ Main function, called when running file as script
+
+ runs do_test in infinite loop
+ """
+
+ # more params
+ fast_fail = True
+ print_everything = False
+ modes = 'w:tar', 'w:gz', 'w:bz2', 'w|tar', 'w|gz', 'w|bz2', 'w#tar', \
+ 'w#gz', #'w#gz.aes128', 'w#gz.aes256', 'w#aes128', 'w#aes256'
+
+ # seed properly
+ random.seed()
+
+ # preparations
+ n_runs = 0
+ error_seeds = []
+ do_stop = False
+
+ # create temp dir
+ with TemporaryDirectory(prefix='deltatar_test_') as temp_dir:
+
+ try:
+ start_time = time()
+ while not do_stop:
+ for mode in modes:
+ seed = create_seed()
+ if not do_test(seed, mode, temp_dir):
+ error_seeds.append(seed)
+ if fast_fail:
+ print('stopping because fast_fail is set')
+ do_stop = True
+ break
+ n_runs += 1
+ if n_runs % 100 == 0:
+ print('at run {} ({:.3f}s per run)'
+ .format(n_runs, (time()-start_time)/n_runs))
+ except KeyboardInterrupt:
+ print('Stopped by user')
+
+ # summarize
+ print('')
+ print('-'*72)
+ n_errs = len(error_seeds)
+ duration = time() - start_time
+ if n_runs == 0:
+ print('summary: no test run has finished')
+ else:
+ print('summary: {} runs, in {}s ({:.3f}s per run); '
+ '{} with errs ({:.2f}%)'
+ .format(n_runs, duration, duration/n_runs, n_errs,
+ 100.0 * float(n_errs)/float(n_runs)))
+ print('seeds that created errors: {}'.format(error_seeds))
+
+
+if __name__ == '__main__':
+ test_forever()