"""
import os
+from os.path import dirname, abspath, basename
import sys
import random
from math import log2
from tempfile import TemporaryDirectory, NamedTemporaryFile
from time import time
from traceback import format_exc
-from os.path import dirname, abspath, basename
# try to import the tarfile from source, not the globally installed one
source_base = dirname(dirname(abspath(__file__)))
#: max size of small files
SMALL_MAX_SIZE = 2 * BLOCKSIZE
+#: max small files to add
+SMALL_MAX_NUMBER = 200
+
#: number of bits used for seeding
SEED_BITS = int(log2(sys.maxsize+1))
return hash_obj.hexdigest()
-def do_test(seed, tar_mode, temp_dir, print_everything=False):
+def do_test(seed, create_mode, extract_mode, temp_dir, print_everything=False):
""" a single test run; returns True if everything went ok """
# output is not printed but remembered and only printed in the end
# if necessary
output = []
- dprnt = print
+ if print_everything:
+ print('-' * 72)
+ prefix = '{:9d}: '.format(seed)
+ dprnt = lambda val: print(prefix + val)
+ else:
+ dprnt = output.append
+ dprnt('-' * 72)
everything_ok = False
+ # record params
+ dprnt('using seed {}, mode {} for create and {} for extract'
+ .format(seed, create_mode, extract_mode))
+
# seed random number generator
- dprnt('using seed {}'.format(seed))
random.seed(seed)
# remember number of files in temp dir
# create tar archive
temp_file = None
try:
- everything_ok = True
- temp_file = NamedTemporaryFile(dir=temp_dir, suffix='.' + tar_mode[2:],
+ temp_file = NamedTemporaryFile(dir=temp_dir,
+ suffix='.' + create_mode[2:],
delete=False, mode='wb')
+
+ # preparations
+ everything_ok = True
files = {}
+ if print_everything:
+ tar_debug = 3
+ else:
+ tar_debug = None
# define local volume handler so can read/write volume_handler_called
volume_handler_called = False
- offset_end_vol0 = None
def new_volume_handler(tarobj, base_name, volume_number):
""" called from tarobj when creating a new volume """
nonlocal volume_handler_called
- nonlocal offset_end_vol0
volume_handler_called = True
- offset_end_vol0 = tarobj.offset
- volume_path = "%s.%d" % (base_name, volume_number)
+ volume_path = "%s.%d" % (temp_file.name, volume_number)
+ dprnt('in volume handler, at offset {}: open volume {}'
+ .format(tarobj.offset, volume_path))
tarobj.open_volume(volume_path)
+ dprnt('in volume handler, after open_volume: offset is {}'
+ .format(tarobj.offset))
dprnt('creating archive {}'.format(temp_file.name))
- with TarFile.open(mode=tar_mode, fileobj=temp_file,
+ with TarFile.open(mode=create_mode, fileobj=temp_file, debug=tar_debug,
max_volume_size=MAX_VOLUME_BLOCKS * BLOCKSIZE,
new_volume_handler=new_volume_handler) as tarobj:
# loop
while not volume_handler_called:
+ if len(files) > SMALL_MAX_NUMBER:
+ everything_ok = False
+ dprnt('reached max number {} of files in archive'
+ .format(len(files)))
+ break
+
# add small file
small_size = random.randint(0, SMALL_MAX_SIZE)
small_name, small_hash, file_info = create_file(small_size,
temp_file.close()
# remember size of first volume (2nd should always be RECORDSIZE)
- dprnt('size of first volume file: {}; offset at vol change: {}'
- .format(os.stat(temp_file.name).st_size, offset_end_vol0))
+ dprnt('size of first volume file: {}'
+ .format(os.stat(temp_file.name).st_size))
if os.stat(temp_file.name + ".1").st_size != RECORDSIZE:
everything_ok = False
dprnt('strange size of 2nd volume: {}'
os.unlink(file_name)
# extract
- with TarFile.open(mode='r' + tar_mode[1:], name=temp_file.name,
+ with TarFile.open(mode=extract_mode, name=temp_file.name,
+ debug=tar_debug,
new_volume_handler=new_volume_handler) as tarobj:
tarobj.extractall(path=temp_dir)
except FileNotFoundError:
pass
- if print_everything or not everything_ok:
+ if (not print_everything) and (not everything_ok):
prefix = '{:9d}: '.format(seed)
for line in output:
print(prefix + line)
+ elif print_everything and everything_ok:
+ dprnt('ended successfully')
return everything_ok
# more params
fast_fail = True
print_everything = True
- modes = 'w:tar', 'w|tar', 'w|gz', 'w|bz2', 'w#tar', \
- 'w#gz', #'w#gz.aes128', 'w#gz.aes256', 'w#aes128', 'w#aes256'
- # not currently working: 'w:gz', 'w:bz2',
+ create_modes = 'w:tar', 'w|tar', 'w|gz', 'w|bz2', 'w#gz'
+ #'w#gz.aes128', 'w#gz.aes256', 'w#aes128', 'w#aes256'
+ # not currently working: 'w:gz', 'w:bz2',
+ extract_mode_starts = 'r:', 'r#', 'r:*'
# seed properly
random.seed()
# preparations
n_runs = 0
- error_seeds = []
+ error_params = []
do_stop = False
# create temp dir
try:
start_time = time()
while not do_stop:
- for mode in modes:
- seed = create_seed()
- if not do_test(seed, mode, temp_dir):
- error_seeds.append(seed)
- if fast_fail:
- print('stopping because fast_fail is set')
- do_stop = True
+ for create_mode in create_modes:
+ if do_stop:
+ break
+ for extract_start in extract_mode_starts:
+ if do_stop:
break
- n_runs += 1
- if n_runs % 100 == 0:
- print('at run {} ({:.3f}s per run)'
- .format(n_runs, (time()-start_time)/n_runs))
+
+ # figure out extract mode for tar file
+ if ('#' in extract_start) and ('#' not in create_mode):
+ continue # not possible
+ full_extract_mode = extract_start
+ if extract_start[-1] != '*':
+ full_extract_mode += create_mode[2:]
+
+ # create seed to re-create results
+ seed = create_seed()
+
+ # run test
+ n_runs += 1
+ everything_ok = \
+ do_test(seed, create_mode, full_extract_mode,
+ temp_dir,
+ print_everything=print_everything)
+
+ # remember error
+ if not everything_ok:
+ error_params.append((seed, create_mode,
+ full_extract_mode))
+ if fast_fail:
+ print('stopping because fast_fail is set')
+ do_stop = True
+ break
+
+ # print some output from time to time
+ if n_runs % 100 == 0:
+ print('at run {} ({:.3f}s per run)'
+ .format(n_runs, (time()-start_time)/n_runs))
except KeyboardInterrupt:
print('Stopped by user')
for line in format_exc().splitlines():
# summarize
print('')
- print('-'*72)
- n_errs = len(error_seeds)
+ print('='*72)
+ n_errs = len(error_params)
duration = time() - start_time
if n_runs == 0:
print('summary: no test run has finished')
else:
print('summary: {} runs, in {}s ({:.3f}s per run); '
- '{} with errs ({:.2f}%)'
+ '{} with errs ({:.0f}%)'
.format(n_runs, duration, duration/n_runs, n_errs,
100.0 * float(n_errs)/float(n_runs)))
- print('seeds that created errors: {}'.format(error_seeds))
+ print('params that created errors')
+ for params in error_params:
+ print(params)
if __name__ == '__main__':