From f6dbed2eff02baebbf6e83343d7a600ec7a2596e Mon Sep 17 00:00:00 2001 From: Christian Herdtweck Date: Mon, 18 Jul 2016 13:06:10 +0200 Subject: [PATCH] moved specification of expectations out of test() function; deal with RECORDSIZE; nearly done --- testing/volume_size_accuracy.py | 302 +++++++++++++++++++++----------------- 1 files changed, 167 insertions(+), 135 deletions(-) diff --git a/testing/volume_size_accuracy.py b/testing/volume_size_accuracy.py index 2c956f9..da5e73f 100755 --- a/testing/volume_size_accuracy.py +++ b/testing/volume_size_accuracy.py @@ -3,11 +3,11 @@ """ Check very accurately the splitting of files into volumes; not a unit test Check: -- behaviour for max_volume_sizes % BLOCK_SIZE != 0 +- behaviour for max_volume_sizes % BLOCKSIZE != 0 - file sizes very close to size remaining in volume By doing the following: -- create a multi-volume archive with max_volume_size % BLOCK_SIZE == 0 +- create a multi-volume archive with max_volume_size % BLOCKSIZE == 0 - add a file that nearly fills the volume - add a small file that should just fit in or not - check expected number and size of volumes @@ -17,58 +17,77 @@ Repeat with compressed/encrypted data; for this have to find some random data that is repeatable (--> :py:mod:`create_pseudo_random_files`) and experiment a bit for suitable seeds and sizes -e.g. for max_volume_size = 8 * BLOCK_SIZE: - -block | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | - +-------+-------+-------+-------+-------+-------+-------+-------+ -all fit into first volume -vol0: | Info0 | Dat00 | Dat01 | Dat02 | Info1 | Dat10 | 0 | 0 | - +-------+-------+-------+-------+-------+-------+-------+-------+ - -file0 needs next block: -vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Info1 | Dat10 | (ends early) -vol1: | 0 | 0 | | | | | | | - +-------+-------+-------+-------+-------+-------+-------+-------+ - -file0 needs 2 more blocks -vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | Info1 | Dat10 | -vol1: | 0 | 0 | | | | | | | - +-------+-------+-------+-------+-------+-------+-------+-------+ - -file0 needs 3 more blocks -vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | (ends early) -vol1: | Dat05 | Info1 | Dat10 | 0 | 0 | | | | - +-------+-------+-------+-------+-------+-------+-------+-------+ - -file0 regular, file1 needs next block: -vol0: | Info0 | Dat00 | Dat01 | Dat02 | Info1 | Dat10 | (ends early) -vol1: | Dat11 | 0 | 0 | | | | | | - +-------+-------+-------+-------+-------+-------+-------+-------+ - -both need next block: -vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Info1 | Dat10 | (ends early) -vol1: | Dat11 | 0 | 0 | | | | | | - +-------+-------+-------+-------+-------+-------+-------+-------+ - -single huge file -vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | (ends early) -vol1: | Dat05 | Dat06 | Dat07 | Dat08 | Dat09 | Dat0A | (ends early) -vol2: | ... | | | | | | | | - +-------+-------+-------+-------+-------+-------+-------+-------+ +e.g. for max_volume_size = RECORDSIZE + 1 * BLOCKSIZE + +File 0 has Info0 and blocks Dat00, Dat01, ... Dat0K, (Dat0L, Dat0M, Dat0N) +File 1 has Info1 and blocks Dat10, (Dat11) + end of end of + RECORD volume +block | 0 | 1 | 2 | ... | -5 | -4 | -3 | -2 | -1 | + +-------+-------+-------+ ... +-------+-------+-------+-------+-------+ +0: all fit into first record: blocks = [N_BLOCKS_PER_RECORD-5, 1] +vol0: | Info0 | Dat00 | Dat01 | ... | Info1 | Dat10 | 0 | 0 | | + +-------+-------+-------+ ... +-------+-------+-------+-------+-------+ + +1: all fit into first volume: blocks = [MAX_VOLUME_BLOCKS-5, 1] +vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Info1 | Dat10 | 0 | 0 | + +-------+-------+-------+ ... +-------+-------+-------+-------+-------+ + +2: file0 needs next block: blocks = [MAX_VOLUME_BLOCKS-4, 1] +vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Dat0L | Info1 | Dat10 | 0 | 0 + +-------+-------+-------+ ... +-------+-------+-------+-------+-------+ + +3: file0 needs 2 more blocks: blocks = [MAX_VOLUME_BLOCKS-3, 1] +vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Dat0L | Dat0M | Info1 | Dat10 |00 + +-------+-------+-------+ ... +-------+-------+-------+-------+-------+ + +4: file0 needs 3 more blocks: blocks = [MAX_VOLUME_BLOCKS-2, 1] +vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Dat0L | Dat0M | wasted space | +vol1: | VOL | Dat0N | Info1 | Dat10 | 00 + +-------+-------+-------+ ... +-------+-------+-------+-------+-------+ + +5: file0 regular, file1 needs next block: blocks = [MAX_VOLUME_BLOCKS-5, 2] +vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Info1 | Dat10 | | | +vol1: | VOL | Dat11 | 0 | 0 | | | | | | + +-------+-------+-------+ ... +-------+-------+-------+-------+-------+ + +6: both need next block: blocks = [MAX_VOLUME_BLOCKS-4, 2] +vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Dat0L | Info1 | Dat10 | | +vol1: | VOL | Dat11 | 0 | 0 | | | | | | + +-------+-------+-------+ ... +-------+-------+-------+-------+-------+ + +single huge file: blocks = [MAX_VOLUME_BLOCKS * 3,] +vol0: | Info0 | Dat00 | Dat01 | ... | Dat02 | Dat03 | Dat04 | wasted space | +vol1: | VOL | Dat05 | Dat06 | Dat07 | ... | Dat08 | Dat09 | wasted space | +vol2: | ... | | | | | | | | | + +-------+-------+-------+ ... +-------+-------+-------+-------+-------+ .. codeauthor:: Intra2net AG """ import os +from os.path import dirname, abspath import sys from math import ceil from glob import glob from tempfile import NamedTemporaryFile, TemporaryDirectory -from deltatar.tarfile import TarFile, BLOCKSIZE + +# try to import the tarfile from source, not the globally installed one +source_base = dirname(dirname(abspath(__file__))) +print('adding {} to python path'.format(source_base)) +if os.path.isdir(source_base): + sys.path.insert(0, source_base) +import inspect +from deltatar.tarfile import TarFile, BLOCKSIZE, RECORDSIZE +print('using TarFile from ' + dirname(inspect.getsourcefile(TarFile))) + + +#: number of blocks in a record +N_BLOCKS_PER_RECORD = RECORDSIZE // BLOCKSIZE #: number of blocks per tar volume file -MAX_VOLUME_BLOCKS = 8 +MAX_VOLUME_BLOCKS = N_BLOCKS_PER_RECORD + 1 def fill_file(file_handle, data_size): @@ -84,136 +103,131 @@ def fill_file(file_handle, data_size): def new_volume_handler(tarobj, base_name, volume_number): """ called from tarobj when creating a new volume """ - volume_path = "%s.%d" % (base_name, volume_number) tarobj.fileobj.close() + volume_path = "%s.%d" % (base_name, volume_number) + print('new-volume handler: creating volume {}'.format(volume_path)) tarobj.open_volume(volume_path) -def test(temp_dir, size0, size1, volume_size_offset=0): +def size_str(size): + """ return string 'N (= b BLKS + m)' """ + return '{} (= {} BLKs + {})'.format(size, *divmod(size, BLOCKSIZE)) + + +def test(temp_dir, file_blocks, volume_blocks_arg, offset_blocks, + file_size_offsets=(0, 0), volume_size_offset=0): """ create TarFile with given configuration """ - if volume_size_offset < 0 or volume_size_offset > BLOCKSIZE-1: + if not (0 <= volume_size_offset < BLOCKSIZE): raise ValueError('volume size offset outside allowed interval ' '[0, BLOCKSIZE-1]: {}'.format(volume_size_offset)) - # in this range, volume_size offset should not make a difference in - # expectations below - if size0 < size1: - raise ValueError('file0 must be larger than file1!') - - # think about what we expect - n_blocks0 = ceil(size0 / BLOCKSIZE) # number of blocks that file0 ... - n_blocks1 = ceil(size1 / BLOCKSIZE) # ... and file1 will require in tar - if n_blocks0 + n_blocks1 < MAX_VOLUME_BLOCKS - 4: # do not want to test... - raise ValueError('overall file sizes too small!') - elif n_blocks0 + n_blocks1 > MAX_VOLUME_BLOCKS + 1: # ...these cases - raise ValueError('overall file sizes too big!') - elif n_blocks0 + n_blocks1 < MAX_VOLUME_BLOCKS - 3: - n_volumes = 1 - else: - n_volumes = 2 - - expected_offset0 = 0 - file0_complete_in_vol0 = n_blocks0 < MAX_VOLUME_BLOCKS - 2 - # file 0 is completed in volume 0 --> file 1 starts in volume 0 - - if file0_complete_in_vol0: - expected_offset1 = (n_blocks0 + 1) * BLOCKSIZE - else: - # offset1 in volume1; 3 blocks in volume0 cannot be used by file0 - expected_offset1 = ( n_blocks0 - (MAX_VOLUME_BLOCKS - 3) ) * BLOCKSIZE - - # nubmer of blocks written overall: data blocks + 2*info + 2*0-blocks - n_blocks_overall = n_blocks0 + n_blocks1 + 2 + 2 + for idx, size_offset in enumerate(file_size_offsets): + if not (0 <= size_offset < BLOCKSIZE): + raise ValueError('size offset for file {} outside allowed interval ' + '[0, BLOCKSIZE-1]: {}' + .format(idx, size_offset)) + if len(file_blocks) != len(file_size_offsets): + raise ValueError('need same number of file block sizes and offsets!') - if n_volumes == 0: - expected_sizes = [n_blocks_overall * BLOCKSIZE, ] - elif file0_complete_in_vol0: - expected_sizes = [(n_blocks0 + 2) * BLOCKSIZE, - (n_blocks_overall-n_blocks0-2)*BLOCKSIZE] - else: - n_blocks_in_vol0 = MAX_VOLUME_BLOCKS-2 - expected_sizes = [n_blocks_in_vol0 * BLOCKSIZE, - (n_blocks_overall-n_blocks_in_vol0) * BLOCKSIZE] - - # create TarFile with max volume size approx 8 blocks max_volume_size = MAX_VOLUME_BLOCKS * BLOCKSIZE + volume_size_offset actual_sizes = [] + actual_offsets = [] volume_files = [] tar_handle = None with NamedTemporaryFile(dir=temp_dir, suffix='.tar', + mode='wb', delete=False) as tar_handle: + # create TarFile print('creating tar {} with max volume size {}' - .format(tar_handle.name, max_volume_size)) + .format(tar_handle.name, size_str(max_volume_size))) tarobj = TarFile.open(mode='w:', fileobj=tar_handle, max_volume_size=max_volume_size, new_volume_handler=new_volume_handler) # add files, remember offsets - real_offset0 = tarobj.offset - - # create and add file0 - add_handle = None - with NamedTemporaryFile(dir=temp_dir, delete=False) as add_handle: - fill_file(add_handle, size0) - add_handle.close() - print('adding file of size {0} ({1} = {2[0]} BLOCKS + {2[1]})' - .format(os.stat(add_handle.name).st_size, size0, - divmod(size0, BLOCKSIZE))) - tarobj.add(add_handle.name, arcname='file0') - if add_handle: - os.unlink(add_handle.name) - real_offset1 = tarobj.offset - - # create and add file1 - add_handle = None - with NamedTemporaryFile(dir=temp_dir, delete=False) as add_handle: - fill_file(add_handle, size1) - add_handle.close() - print('adding file of size {0} ({1} = {2[0]} BLOCKS + {2[1]})' - .format(os.stat(add_handle.name).st_size, size1, - divmod(size1, BLOCKSIZE))) - tarobj.add(add_handle.name, arcname='file1') - if add_handle: - os.unlink(add_handle.name) + for idx, (size_blocks, size_offset) \ + in enumerate(zip(file_blocks, file_size_offsets)): + + # remember offset + actual_offsets.append(tarobj.offset) + + # create file + add_handle = None + file_size = size_blocks * BLOCKSIZE - size_offset + with NamedTemporaryFile(dir=temp_dir, delete=False) as add_handle: + fill_file(add_handle, file_size) + add_handle.close() + print('adding file of size {} at offset {}' + .format(size_str(file_size), size_str(tarobj.offset))) + + # add file + tarobj.add(add_handle.name, arcname='file{}'.format(idx)) + + # remove file + if add_handle: + os.unlink(add_handle.name) + + # remember offset where 0s should be added + actual_offsets.append(tarobj.offset) # close tar file + print('before close: offset is ' + size_str(actual_offsets[-1])) tarobj.close() + tar_handle.close() + print('after close: offset is {}' + size_str(tarobj.offset)) # get volume file sizes volume_files = sorted(glob(tar_handle.name + "*")) for volume_file in volume_files: - actual_sizes.append(os.stat(volume_file).st_size) + actual_size = os.stat(volume_file).st_size print('found volume {} of size {}' - .format(volume_file, actual_sizes[-1])) + .format(volume_file, size_str(actual_size))) + actual_sizes.append(actual_size) for volume_file in volume_files: os.unlink(volume_file) # now all temp files should be deleted again # check expectation - found_err = False - if len(actual_sizes) != n_volumes: - found_err = True + everything_ok = True + if len(actual_offsets) != len(offset_blocks): + everything_ok = False + print('have {} actual offsets but expected {}!' + .format(len(actual_offsets), len(offset_blocks))) + for idx, (actual_offset, expected_block) \ + in enumerate(zip(actual_offsets, offset_blocks)): + if actual_offset != expected_block * BLOCKSIZE: + everything_ok = False + print('wrong offset for file {}: {} != {}' + .format(idx, size_str(actual_offset), + size_str(expected_block*BLOCKSIZE))) + + # last volume is filled up to RECORDSIZE + volume_blocks = list(volume_blocks_arg) + if (len(actual_sizes) == len(volume_blocks)-1) \ + and (volume_blocks[-1] == 0): + actual_sizes.append(0) + volume_blocks[-2] = ceil(volume_blocks[-2] / N_BLOCKS_PER_RECORD) \ + * N_BLOCKS_PER_RECORD + elif len(actual_sizes) == len(volume_blocks) + 1: + volume_blocks[-1] = ceil(volume_blocks[-1] / N_BLOCKS_PER_RECORD) \ + * N_BLOCKS_PER_RECORD + else: + everything_ok = False print('wrong number of volumes: {} != {}' - .format(len(actual_sizes), n_volumes)) - for vol_idx, (actual_size, expected_size) in \ - enumerate(zip(actual_sizes, expected_sizes)): - if actual_size != expected_size: - found_err = True + .format(len(actual_sizes)-1, len(volume_blocks))) + + for idx, (actual_size, expected_blocks) in \ + enumerate(zip(actual_sizes, volume_blocks)): + if actual_size != expected_blocks * BLOCKSIZE: + everything_ok = False print('wrong size for volume {}: {} != {}' - .format(vol_idx, actual_size, expected_size)) - if real_offset0 != expected_offset0: - found_err = True - print('wrong offset for file0: {} != {}' - .format(real_offset0, expected_offset0)) - if real_offset1 != expected_offset1: - found_err = True - print('wrong offset for file1: {} != {}' - .format(real_offset1, expected_offset1)) + .format(idx, size_str(actual_size), + size_str(expected_blocks * BLOCKSIZE))) - return ~found_err + return everything_ok def main(): @@ -222,11 +236,29 @@ def main(): see module doc for more info """ + N = N_BLOCKS_PER_RECORD + M = MAX_VOLUME_BLOCKS + + # define tests by numbers of blocks: + # n_blocks file 0, 1; n_blocks vol0, 1, offset Info0, Info1, 0-blocks + tests = (((N-5, 1), (N, 0), (0, N-4, N-2)), + ((M-5, 1), (M, 0), (0, M-4, M-2)), + ((M-4, 1), (M+1, 0), (0, M-3, M-1)), + ((M-3, 1), (M+2, 0), (0, M-2, M)), + ((M-2, 1), (M-2, 6), (0, 2, 4)), + ((M-5, 2), (M-2, 4), (0, M-4, 2)), + ((M-4, 2), (M-1, 4), (0, M-3, 2))) + n_errs = 0 with TemporaryDirectory() as temp_dir: - test_succeeded = test(temp_dir, 3*BLOCKSIZE, 1*BLOCKSIZE) - if not test_succeeded: - n_errs += 1 + for size_comb_idx, (file_sizes, vol_sizes, offsets) \ + in enumerate(tests): + print('-' * 72) + print('size combination {}: ({}, {})' + .format(size_comb_idx, *file_sizes)) + test_succeeded = test(temp_dir, file_sizes, vol_sizes, offsets) + if not test_succeeded: + n_errs += 1 return n_errs -- 1.7.1