From 283153fb1645ba452aaed32b7c7847fbbce35e5a Mon Sep 17 00:00:00 2001 From: Christian Herdtweck Date: Thu, 21 Jul 2016 18:15:12 +0200 Subject: [PATCH] continue testing of volume split with compressed archives; may have found another TarFile bug --- testing/test_volume_split.py | 125 ++++++++++++++++++++++++++++++----------- 1 files changed, 91 insertions(+), 34 deletions(-) diff --git a/testing/test_volume_split.py b/testing/test_volume_split.py index 9cd2e27..85d394a 100755 --- a/testing/test_volume_split.py +++ b/testing/test_volume_split.py @@ -19,6 +19,7 @@ More precisely: """ import os +from os.path import dirname, abspath, basename import sys import random from math import log2 @@ -26,7 +27,6 @@ from hashlib import md5 as hash_type from tempfile import TemporaryDirectory, NamedTemporaryFile from time import time from traceback import format_exc -from os.path import dirname, abspath, basename # try to import the tarfile from source, not the globally installed one source_base = dirname(dirname(abspath(__file__))) @@ -50,6 +50,9 @@ BIG_SIZE = (MAX_VOLUME_BLOCKS-3) * BLOCKSIZE #: max size of small files SMALL_MAX_SIZE = 2 * BLOCKSIZE +#: max small files to add +SMALL_MAX_NUMBER = 200 + #: number of bits used for seeding SEED_BITS = int(log2(sys.maxsize+1)) @@ -123,17 +126,26 @@ def hash_file(file_name): return hash_obj.hexdigest() -def do_test(seed, tar_mode, temp_dir, print_everything=False): +def do_test(seed, create_mode, extract_mode, temp_dir, print_everything=False): """ a single test run; returns True if everything went ok """ # output is not printed but remembered and only printed in the end # if necessary output = [] - dprnt = print + if print_everything: + print('-' * 72) + prefix = '{:9d}: '.format(seed) + dprnt = lambda val: print(prefix + val) + else: + dprnt = output.append + dprnt('-' * 72) everything_ok = False + # record params + dprnt('using seed {}, mode {} for create and {} for extract' + .format(seed, create_mode, extract_mode)) + # seed random number generator - dprnt('using seed {}'.format(seed)) random.seed(seed) # remember number of files in temp dir @@ -142,25 +154,33 @@ def do_test(seed, tar_mode, temp_dir, print_everything=False): # create tar archive temp_file = None try: - everything_ok = True - temp_file = NamedTemporaryFile(dir=temp_dir, suffix='.' + tar_mode[2:], + temp_file = NamedTemporaryFile(dir=temp_dir, + suffix='.' + create_mode[2:], delete=False, mode='wb') + + # preparations + everything_ok = True files = {} + if print_everything: + tar_debug = 3 + else: + tar_debug = None # define local volume handler so can read/write volume_handler_called volume_handler_called = False - offset_end_vol0 = None def new_volume_handler(tarobj, base_name, volume_number): """ called from tarobj when creating a new volume """ nonlocal volume_handler_called - nonlocal offset_end_vol0 volume_handler_called = True - offset_end_vol0 = tarobj.offset - volume_path = "%s.%d" % (base_name, volume_number) + volume_path = "%s.%d" % (temp_file.name, volume_number) + dprnt('in volume handler, at offset {}: open volume {}' + .format(tarobj.offset, volume_path)) tarobj.open_volume(volume_path) + dprnt('in volume handler, after open_volume: offset is {}' + .format(tarobj.offset)) dprnt('creating archive {}'.format(temp_file.name)) - with TarFile.open(mode=tar_mode, fileobj=temp_file, + with TarFile.open(mode=create_mode, fileobj=temp_file, debug=tar_debug, max_volume_size=MAX_VOLUME_BLOCKS * BLOCKSIZE, new_volume_handler=new_volume_handler) as tarobj: @@ -180,6 +200,12 @@ def do_test(seed, tar_mode, temp_dir, print_everything=False): # loop while not volume_handler_called: + if len(files) > SMALL_MAX_NUMBER: + everything_ok = False + dprnt('reached max number {} of files in archive' + .format(len(files))) + break + # add small file small_size = random.randint(0, SMALL_MAX_SIZE) small_name, small_hash, file_info = create_file(small_size, @@ -200,8 +226,8 @@ def do_test(seed, tar_mode, temp_dir, print_everything=False): temp_file.close() # remember size of first volume (2nd should always be RECORDSIZE) - dprnt('size of first volume file: {}; offset at vol change: {}' - .format(os.stat(temp_file.name).st_size, offset_end_vol0)) + dprnt('size of first volume file: {}' + .format(os.stat(temp_file.name).st_size)) if os.stat(temp_file.name + ".1").st_size != RECORDSIZE: everything_ok = False dprnt('strange size of 2nd volume: {}' @@ -213,7 +239,8 @@ def do_test(seed, tar_mode, temp_dir, print_everything=False): os.unlink(file_name) # extract - with TarFile.open(mode='r' + tar_mode[1:], name=temp_file.name, + with TarFile.open(mode=extract_mode, name=temp_file.name, + debug=tar_debug, new_volume_handler=new_volume_handler) as tarobj: tarobj.extractall(path=temp_dir) @@ -252,10 +279,12 @@ def do_test(seed, tar_mode, temp_dir, print_everything=False): except FileNotFoundError: pass - if print_everything or not everything_ok: + if (not print_everything) and (not everything_ok): prefix = '{:9d}: '.format(seed) for line in output: print(prefix + line) + elif print_everything and everything_ok: + dprnt('ended successfully') return everything_ok @@ -278,16 +307,17 @@ def test_forever(): # more params fast_fail = True print_everything = True - modes = 'w:tar', 'w|tar', 'w|gz', 'w|bz2', 'w#tar', \ - 'w#gz', #'w#gz.aes128', 'w#gz.aes256', 'w#aes128', 'w#aes256' - # not currently working: 'w:gz', 'w:bz2', + create_modes = 'w:tar', 'w|tar', 'w|gz', 'w|bz2', 'w#gz' + #'w#gz.aes128', 'w#gz.aes256', 'w#aes128', 'w#aes256' + # not currently working: 'w:gz', 'w:bz2', + extract_mode_starts = 'r:', 'r#', 'r:*' # seed properly random.seed() # preparations n_runs = 0 - error_seeds = [] + error_params = [] do_stop = False # create temp dir @@ -296,18 +326,43 @@ def test_forever(): try: start_time = time() while not do_stop: - for mode in modes: - seed = create_seed() - if not do_test(seed, mode, temp_dir): - error_seeds.append(seed) - if fast_fail: - print('stopping because fast_fail is set') - do_stop = True + for create_mode in create_modes: + if do_stop: + break + for extract_start in extract_mode_starts: + if do_stop: break - n_runs += 1 - if n_runs % 100 == 0: - print('at run {} ({:.3f}s per run)' - .format(n_runs, (time()-start_time)/n_runs)) + + # figure out extract mode for tar file + if ('#' in extract_start) and ('#' not in create_mode): + continue # not possible + full_extract_mode = extract_start + if extract_start[-1] != '*': + full_extract_mode += create_mode[2:] + + # create seed to re-create results + seed = create_seed() + + # run test + n_runs += 1 + everything_ok = \ + do_test(seed, create_mode, full_extract_mode, + temp_dir, + print_everything=print_everything) + + # remember error + if not everything_ok: + error_params.append((seed, create_mode, + full_extract_mode)) + if fast_fail: + print('stopping because fast_fail is set') + do_stop = True + break + + # print some output from time to time + if n_runs % 100 == 0: + print('at run {} ({:.3f}s per run)' + .format(n_runs, (time()-start_time)/n_runs)) except KeyboardInterrupt: print('Stopped by user') for line in format_exc().splitlines(): @@ -315,17 +370,19 @@ def test_forever(): # summarize print('') - print('-'*72) - n_errs = len(error_seeds) + print('='*72) + n_errs = len(error_params) duration = time() - start_time if n_runs == 0: print('summary: no test run has finished') else: print('summary: {} runs, in {}s ({:.3f}s per run); ' - '{} with errs ({:.2f}%)' + '{} with errs ({:.0f}%)' .format(n_runs, duration, duration/n_runs, n_errs, 100.0 * float(n_errs)/float(n_runs))) - print('seeds that created errors: {}'.format(error_seeds)) + print('params that created errors') + for params in error_params: + print(params) if __name__ == '__main__': -- 1.7.1