From 45169d4ed2be8a47db5c4f6d6598f44c17b5bd97 Mon Sep 17 00:00:00 2001 From: Christian Herdtweck Date: Fri, 15 Jul 2016 12:43:16 +0200 Subject: [PATCH] implemented volume size accuracy test before realizing RECORDSIZE --- testing/volume_size_accuracy.py | 220 +++++++++++++++++++++++++++++++++++---- 1 files changed, 200 insertions(+), 20 deletions(-) mode change 100644 => 100755 testing/volume_size_accuracy.py diff --git a/testing/volume_size_accuracy.py b/testing/volume_size_accuracy.py old mode 100644 new mode 100755 index 2e8e5a4..2c956f9 --- a/testing/volume_size_accuracy.py +++ b/testing/volume_size_accuracy.py @@ -12,35 +12,208 @@ By doing the following: - add a small file that should just fit in or not - check expected number and size of volumes - repeat with max_volume_size +1, -1, +2, -2, +10, -10 - + Repeat with compressed/encrypted data; for this have to find some random data that is repeatable (--> :py:mod:`create_pseudo_random_files`) and experiment a bit for suitable seeds and sizes e.g. for max_volume_size = 8 * BLOCK_SIZE: -block | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | (8!) -------+--------+-------+-------+-------+-------+-------+-------+-------+------ -file0 fits into 5 blocks: -vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | Info1 | Dat10 | -vol1: | Dat11 | | | | | | | | - -OR: file0 needs 6th block --> force volume0 too big: -vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | Dat05 | Info1 | Dat10 -vol1: | Dat11 | | | | | | | | - -OR: all fit into first volume -vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | Info1 | Dat10 | - -NOT: Info block in the end -vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | Dat05 | Info1 | -vol1: | Dat10 | Dat11 | | | | | | | +block | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + +-------+-------+-------+-------+-------+-------+-------+-------+ +all fit into first volume +vol0: | Info0 | Dat00 | Dat01 | Dat02 | Info1 | Dat10 | 0 | 0 | + +-------+-------+-------+-------+-------+-------+-------+-------+ + +file0 needs next block: +vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Info1 | Dat10 | (ends early) +vol1: | 0 | 0 | | | | | | | + +-------+-------+-------+-------+-------+-------+-------+-------+ + +file0 needs 2 more blocks +vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | Info1 | Dat10 | +vol1: | 0 | 0 | | | | | | | + +-------+-------+-------+-------+-------+-------+-------+-------+ + +file0 needs 3 more blocks +vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | (ends early) +vol1: | Dat05 | Info1 | Dat10 | 0 | 0 | | | | + +-------+-------+-------+-------+-------+-------+-------+-------+ + +file0 regular, file1 needs next block: +vol0: | Info0 | Dat00 | Dat01 | Dat02 | Info1 | Dat10 | (ends early) +vol1: | Dat11 | 0 | 0 | | | | | | + +-------+-------+-------+-------+-------+-------+-------+-------+ + +both need next block: +vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Info1 | Dat10 | (ends early) +vol1: | Dat11 | 0 | 0 | | | | | | + +-------+-------+-------+-------+-------+-------+-------+-------+ + +single huge file +vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | (ends early) +vol1: | Dat05 | Dat06 | Dat07 | Dat08 | Dat09 | Dat0A | (ends early) +vol2: | ... | | | | | | | | + +-------+-------+-------+-------+-------+-------+-------+-------+ .. codeauthor:: Intra2net AG """ -from deltatar.tarfile import TarFile +import os +import sys +from math import ceil +from glob import glob +from tempfile import NamedTemporaryFile, TemporaryDirectory +from deltatar.tarfile import TarFile, BLOCKSIZE + +#: number of blocks per tar volume file +MAX_VOLUME_BLOCKS = 8 + + +def fill_file(file_handle, data_size): + """ fill given file handle with nonsense data of given size """ + temp_data = bytes(range(2**8)) + temp_size = len(temp_data) + n_written = 0 + while n_written + temp_size <= data_size: + file_handle.write(temp_data) + n_written += temp_size + file_handle.write(temp_data[:data_size-n_written]) + + +def new_volume_handler(tarobj, base_name, volume_number): + """ called from tarobj when creating a new volume """ + volume_path = "%s.%d" % (base_name, volume_number) + tarobj.fileobj.close() + tarobj.open_volume(volume_path) + + +def test(temp_dir, size0, size1, volume_size_offset=0): + """ create TarFile with given configuration """ + + if volume_size_offset < 0 or volume_size_offset > BLOCKSIZE-1: + raise ValueError('volume size offset outside allowed interval ' + '[0, BLOCKSIZE-1]: {}'.format(volume_size_offset)) + # in this range, volume_size offset should not make a difference in + # expectations below + if size0 < size1: + raise ValueError('file0 must be larger than file1!') + + # think about what we expect + n_blocks0 = ceil(size0 / BLOCKSIZE) # number of blocks that file0 ... + n_blocks1 = ceil(size1 / BLOCKSIZE) # ... and file1 will require in tar + if n_blocks0 + n_blocks1 < MAX_VOLUME_BLOCKS - 4: # do not want to test... + raise ValueError('overall file sizes too small!') + elif n_blocks0 + n_blocks1 > MAX_VOLUME_BLOCKS + 1: # ...these cases + raise ValueError('overall file sizes too big!') + elif n_blocks0 + n_blocks1 < MAX_VOLUME_BLOCKS - 3: + n_volumes = 1 + else: + n_volumes = 2 + + expected_offset0 = 0 + file0_complete_in_vol0 = n_blocks0 < MAX_VOLUME_BLOCKS - 2 + # file 0 is completed in volume 0 --> file 1 starts in volume 0 + + if file0_complete_in_vol0: + expected_offset1 = (n_blocks0 + 1) * BLOCKSIZE + else: + # offset1 in volume1; 3 blocks in volume0 cannot be used by file0 + expected_offset1 = ( n_blocks0 - (MAX_VOLUME_BLOCKS - 3) ) * BLOCKSIZE + + # nubmer of blocks written overall: data blocks + 2*info + 2*0-blocks + n_blocks_overall = n_blocks0 + n_blocks1 + 2 + 2 + + if n_volumes == 0: + expected_sizes = [n_blocks_overall * BLOCKSIZE, ] + elif file0_complete_in_vol0: + expected_sizes = [(n_blocks0 + 2) * BLOCKSIZE, + (n_blocks_overall-n_blocks0-2)*BLOCKSIZE] + else: + n_blocks_in_vol0 = MAX_VOLUME_BLOCKS-2 + expected_sizes = [n_blocks_in_vol0 * BLOCKSIZE, + (n_blocks_overall-n_blocks_in_vol0) * BLOCKSIZE] + + # create TarFile with max volume size approx 8 blocks + max_volume_size = MAX_VOLUME_BLOCKS * BLOCKSIZE + volume_size_offset + actual_sizes = [] + volume_files = [] + tar_handle = None + with NamedTemporaryFile(dir=temp_dir, + suffix='.tar', + delete=False) as tar_handle: + print('creating tar {} with max volume size {}' + .format(tar_handle.name, max_volume_size)) + tarobj = TarFile.open(mode='w:', + fileobj=tar_handle, + max_volume_size=max_volume_size, + new_volume_handler=new_volume_handler) + + # add files, remember offsets + real_offset0 = tarobj.offset + + # create and add file0 + add_handle = None + with NamedTemporaryFile(dir=temp_dir, delete=False) as add_handle: + fill_file(add_handle, size0) + add_handle.close() + print('adding file of size {0} ({1} = {2[0]} BLOCKS + {2[1]})' + .format(os.stat(add_handle.name).st_size, size0, + divmod(size0, BLOCKSIZE))) + tarobj.add(add_handle.name, arcname='file0') + if add_handle: + os.unlink(add_handle.name) + real_offset1 = tarobj.offset + + # create and add file1 + add_handle = None + with NamedTemporaryFile(dir=temp_dir, delete=False) as add_handle: + fill_file(add_handle, size1) + add_handle.close() + print('adding file of size {0} ({1} = {2[0]} BLOCKS + {2[1]})' + .format(os.stat(add_handle.name).st_size, size1, + divmod(size1, BLOCKSIZE))) + tarobj.add(add_handle.name, arcname='file1') + if add_handle: + os.unlink(add_handle.name) + + # close tar file + tarobj.close() + + # get volume file sizes + volume_files = sorted(glob(tar_handle.name + "*")) + for volume_file in volume_files: + actual_sizes.append(os.stat(volume_file).st_size) + print('found volume {} of size {}' + .format(volume_file, actual_sizes[-1])) + + for volume_file in volume_files: + os.unlink(volume_file) + # now all temp files should be deleted again + + # check expectation + found_err = False + if len(actual_sizes) != n_volumes: + found_err = True + print('wrong number of volumes: {} != {}' + .format(len(actual_sizes), n_volumes)) + for vol_idx, (actual_size, expected_size) in \ + enumerate(zip(actual_sizes, expected_sizes)): + if actual_size != expected_size: + found_err = True + print('wrong size for volume {}: {} != {}' + .format(vol_idx, actual_size, expected_size)) + if real_offset0 != expected_offset0: + found_err = True + print('wrong offset for file0: {} != {}' + .format(real_offset0, expected_offset0)) + if real_offset1 != expected_offset1: + found_err = True + print('wrong offset for file1: {} != {}' + .format(real_offset1, expected_offset1)) + + return ~found_err def main(): @@ -48,8 +221,15 @@ def main(): see module doc for more info """ - raise NotImplementedError() + + n_errs = 0 + with TemporaryDirectory() as temp_dir: + test_succeeded = test(temp_dir, 3*BLOCKSIZE, 1*BLOCKSIZE) + if not test_succeeded: + n_errs += 1 + + return n_errs if __name__ == '__main__': - main() + sys.exit(main()) -- 1.7.1