""" Check very accurately the splitting of files into volumes; not a unit test
Check:
-- behaviour for max_volume_sizes % BLOCK_SIZE != 0
+- behaviour for max_volume_sizes % BLOCKSIZE != 0
- file sizes very close to size remaining in volume
By doing the following:
-- create a multi-volume archive with max_volume_size % BLOCK_SIZE == 0
+- create a multi-volume archive with max_volume_size % BLOCKSIZE == 0
- add a file that nearly fills the volume
- add a small file that should just fit in or not
- check expected number and size of volumes
that is repeatable (--> :py:mod:`create_pseudo_random_files`) and experiment a
bit for suitable seeds and sizes
-e.g. for max_volume_size = 8 * BLOCK_SIZE:
-
-block | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
- +-------+-------+-------+-------+-------+-------+-------+-------+
-all fit into first volume
-vol0: | Info0 | Dat00 | Dat01 | Dat02 | Info1 | Dat10 | 0 | 0 |
- +-------+-------+-------+-------+-------+-------+-------+-------+
-
-file0 needs next block:
-vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Info1 | Dat10 | (ends early)
-vol1: | 0 | 0 | | | | | | |
- +-------+-------+-------+-------+-------+-------+-------+-------+
-
-file0 needs 2 more blocks
-vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | Info1 | Dat10 |
-vol1: | 0 | 0 | | | | | | |
- +-------+-------+-------+-------+-------+-------+-------+-------+
-
-file0 needs 3 more blocks
-vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | (ends early)
-vol1: | Dat05 | Info1 | Dat10 | 0 | 0 | | | |
- +-------+-------+-------+-------+-------+-------+-------+-------+
-
-file0 regular, file1 needs next block:
-vol0: | Info0 | Dat00 | Dat01 | Dat02 | Info1 | Dat10 | (ends early)
-vol1: | Dat11 | 0 | 0 | | | | | |
- +-------+-------+-------+-------+-------+-------+-------+-------+
-
-both need next block:
-vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Info1 | Dat10 | (ends early)
-vol1: | Dat11 | 0 | 0 | | | | | |
- +-------+-------+-------+-------+-------+-------+-------+-------+
-
-single huge file
-vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | (ends early)
-vol1: | Dat05 | Dat06 | Dat07 | Dat08 | Dat09 | Dat0A | (ends early)
-vol2: | ... | | | | | | | |
- +-------+-------+-------+-------+-------+-------+-------+-------+
+e.g. for max_volume_size = RECORDSIZE + 1 * BLOCKSIZE
+
+File 0 has Info0 and blocks Dat00, Dat01, ... Dat0K, (Dat0L, Dat0M, Dat0N)
+File 1 has Info1 and blocks Dat10, (Dat11)
+ end of end of
+ RECORD volume
+block | 0 | 1 | 2 | ... | -5 | -4 | -3 | -2 | -1 |
+ +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+0: all fit into first record: blocks = [N_BLOCKS_PER_RECORD-5, 1]
+vol0: | Info0 | Dat00 | Dat01 | ... | Info1 | Dat10 | 0 | 0 | |
+ +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+
+1: all fit into first volume: blocks = [MAX_VOLUME_BLOCKS-5, 1]
+vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Info1 | Dat10 | 0 | 0 |
+ +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+
+2: file0 needs next block: blocks = [MAX_VOLUME_BLOCKS-4, 1]
+vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Dat0L | Info1 | Dat10 | 0 | 0
+ +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+
+3: file0 needs 2 more blocks: blocks = [MAX_VOLUME_BLOCKS-3, 1]
+vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Dat0L | Dat0M | Info1 | Dat10 |00
+ +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+
+4: file0 needs 3 more blocks: blocks = [MAX_VOLUME_BLOCKS-2, 1]
+vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Dat0L | Dat0M | wasted space |
+vol1: | VOL | Dat0N | Info1 | Dat10 | 00
+ +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+
+5: file0 regular, file1 needs next block: blocks = [MAX_VOLUME_BLOCKS-5, 2]
+vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Info1 | Dat10 | | |
+vol1: | VOL | Dat11 | 0 | 0 | | | | | |
+ +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+
+6: both need next block: blocks = [MAX_VOLUME_BLOCKS-4, 2]
+vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Dat0L | Info1 | Dat10 | |
+vol1: | VOL | Dat11 | 0 | 0 | | | | | |
+ +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+
+single huge file: blocks = [MAX_VOLUME_BLOCKS * 3,]
+vol0: | Info0 | Dat00 | Dat01 | ... | Dat02 | Dat03 | Dat04 | wasted space |
+vol1: | VOL | Dat05 | Dat06 | Dat07 | ... | Dat08 | Dat09 | wasted space |
+vol2: | ... | | | | | | | | |
+ +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
.. codeauthor:: Intra2net AG <info@intra2net>
"""
import os
+from os.path import dirname, abspath
import sys
from math import ceil
from glob import glob
from tempfile import NamedTemporaryFile, TemporaryDirectory
-from deltatar.tarfile import TarFile, BLOCKSIZE
+
+# try to import the tarfile from source, not the globally installed one
+source_base = dirname(dirname(abspath(__file__)))
+print('adding {} to python path'.format(source_base))
+if os.path.isdir(source_base):
+ sys.path.insert(0, source_base)
+import inspect
+from deltatar.tarfile import TarFile, BLOCKSIZE, RECORDSIZE
+print('using TarFile from ' + dirname(inspect.getsourcefile(TarFile)))
+
+
+#: number of blocks in a record
+N_BLOCKS_PER_RECORD = RECORDSIZE // BLOCKSIZE
#: number of blocks per tar volume file
-MAX_VOLUME_BLOCKS = 8
+MAX_VOLUME_BLOCKS = N_BLOCKS_PER_RECORD + 1
def fill_file(file_handle, data_size):
def new_volume_handler(tarobj, base_name, volume_number):
""" called from tarobj when creating a new volume """
- volume_path = "%s.%d" % (base_name, volume_number)
tarobj.fileobj.close()
+ volume_path = "%s.%d" % (base_name, volume_number)
+ print('new-volume handler: creating volume {}'.format(volume_path))
tarobj.open_volume(volume_path)
-def test(temp_dir, size0, size1, volume_size_offset=0):
+def size_str(size):
+ """ return string 'N (= b BLKS + m)' """
+ return '{} (= {} BLKs + {})'.format(size, *divmod(size, BLOCKSIZE))
+
+
+def test(temp_dir, file_blocks, volume_blocks_arg, offset_blocks,
+ file_size_offsets=(0, 0), volume_size_offset=0):
""" create TarFile with given configuration """
- if volume_size_offset < 0 or volume_size_offset > BLOCKSIZE-1:
+ if not (0 <= volume_size_offset < BLOCKSIZE):
raise ValueError('volume size offset outside allowed interval '
'[0, BLOCKSIZE-1]: {}'.format(volume_size_offset))
- # in this range, volume_size offset should not make a difference in
- # expectations below
- if size0 < size1:
- raise ValueError('file0 must be larger than file1!')
-
- # think about what we expect
- n_blocks0 = ceil(size0 / BLOCKSIZE) # number of blocks that file0 ...
- n_blocks1 = ceil(size1 / BLOCKSIZE) # ... and file1 will require in tar
- if n_blocks0 + n_blocks1 < MAX_VOLUME_BLOCKS - 4: # do not want to test...
- raise ValueError('overall file sizes too small!')
- elif n_blocks0 + n_blocks1 > MAX_VOLUME_BLOCKS + 1: # ...these cases
- raise ValueError('overall file sizes too big!')
- elif n_blocks0 + n_blocks1 < MAX_VOLUME_BLOCKS - 3:
- n_volumes = 1
- else:
- n_volumes = 2
-
- expected_offset0 = 0
- file0_complete_in_vol0 = n_blocks0 < MAX_VOLUME_BLOCKS - 2
- # file 0 is completed in volume 0 --> file 1 starts in volume 0
-
- if file0_complete_in_vol0:
- expected_offset1 = (n_blocks0 + 1) * BLOCKSIZE
- else:
- # offset1 in volume1; 3 blocks in volume0 cannot be used by file0
- expected_offset1 = ( n_blocks0 - (MAX_VOLUME_BLOCKS - 3) ) * BLOCKSIZE
-
- # nubmer of blocks written overall: data blocks + 2*info + 2*0-blocks
- n_blocks_overall = n_blocks0 + n_blocks1 + 2 + 2
+ for idx, size_offset in enumerate(file_size_offsets):
+ if not (0 <= size_offset < BLOCKSIZE):
+ raise ValueError('size offset for file {} outside allowed interval '
+ '[0, BLOCKSIZE-1]: {}'
+ .format(idx, size_offset))
+ if len(file_blocks) != len(file_size_offsets):
+ raise ValueError('need same number of file block sizes and offsets!')
- if n_volumes == 0:
- expected_sizes = [n_blocks_overall * BLOCKSIZE, ]
- elif file0_complete_in_vol0:
- expected_sizes = [(n_blocks0 + 2) * BLOCKSIZE,
- (n_blocks_overall-n_blocks0-2)*BLOCKSIZE]
- else:
- n_blocks_in_vol0 = MAX_VOLUME_BLOCKS-2
- expected_sizes = [n_blocks_in_vol0 * BLOCKSIZE,
- (n_blocks_overall-n_blocks_in_vol0) * BLOCKSIZE]
-
- # create TarFile with max volume size approx 8 blocks
max_volume_size = MAX_VOLUME_BLOCKS * BLOCKSIZE + volume_size_offset
actual_sizes = []
+ actual_offsets = []
volume_files = []
tar_handle = None
with NamedTemporaryFile(dir=temp_dir,
suffix='.tar',
+ mode='wb',
delete=False) as tar_handle:
+ # create TarFile
print('creating tar {} with max volume size {}'
- .format(tar_handle.name, max_volume_size))
+ .format(tar_handle.name, size_str(max_volume_size)))
tarobj = TarFile.open(mode='w:',
fileobj=tar_handle,
max_volume_size=max_volume_size,
new_volume_handler=new_volume_handler)
# add files, remember offsets
- real_offset0 = tarobj.offset
-
- # create and add file0
- add_handle = None
- with NamedTemporaryFile(dir=temp_dir, delete=False) as add_handle:
- fill_file(add_handle, size0)
- add_handle.close()
- print('adding file of size {0} ({1} = {2[0]} BLOCKS + {2[1]})'
- .format(os.stat(add_handle.name).st_size, size0,
- divmod(size0, BLOCKSIZE)))
- tarobj.add(add_handle.name, arcname='file0')
- if add_handle:
- os.unlink(add_handle.name)
- real_offset1 = tarobj.offset
-
- # create and add file1
- add_handle = None
- with NamedTemporaryFile(dir=temp_dir, delete=False) as add_handle:
- fill_file(add_handle, size1)
- add_handle.close()
- print('adding file of size {0} ({1} = {2[0]} BLOCKS + {2[1]})'
- .format(os.stat(add_handle.name).st_size, size1,
- divmod(size1, BLOCKSIZE)))
- tarobj.add(add_handle.name, arcname='file1')
- if add_handle:
- os.unlink(add_handle.name)
+ for idx, (size_blocks, size_offset) \
+ in enumerate(zip(file_blocks, file_size_offsets)):
+
+ # remember offset
+ actual_offsets.append(tarobj.offset)
+
+ # create file
+ add_handle = None
+ file_size = size_blocks * BLOCKSIZE - size_offset
+ with NamedTemporaryFile(dir=temp_dir, delete=False) as add_handle:
+ fill_file(add_handle, file_size)
+ add_handle.close()
+ print('adding file of size {} at offset {}'
+ .format(size_str(file_size), size_str(tarobj.offset)))
+
+ # add file
+ tarobj.add(add_handle.name, arcname='file{}'.format(idx))
+
+ # remove file
+ if add_handle:
+ os.unlink(add_handle.name)
+
+ # remember offset where 0s should be added
+ actual_offsets.append(tarobj.offset)
# close tar file
+ print('before close: offset is ' + size_str(actual_offsets[-1]))
tarobj.close()
+ tar_handle.close()
+ print('after close: offset is {}' + size_str(tarobj.offset))
# get volume file sizes
volume_files = sorted(glob(tar_handle.name + "*"))
for volume_file in volume_files:
- actual_sizes.append(os.stat(volume_file).st_size)
+ actual_size = os.stat(volume_file).st_size
print('found volume {} of size {}'
- .format(volume_file, actual_sizes[-1]))
+ .format(volume_file, size_str(actual_size)))
+ actual_sizes.append(actual_size)
for volume_file in volume_files:
os.unlink(volume_file)
# now all temp files should be deleted again
# check expectation
- found_err = False
- if len(actual_sizes) != n_volumes:
- found_err = True
+ everything_ok = True
+ if len(actual_offsets) != len(offset_blocks):
+ everything_ok = False
+ print('have {} actual offsets but expected {}!'
+ .format(len(actual_offsets), len(offset_blocks)))
+ for idx, (actual_offset, expected_block) \
+ in enumerate(zip(actual_offsets, offset_blocks)):
+ if actual_offset != expected_block * BLOCKSIZE:
+ everything_ok = False
+ print('wrong offset for file {}: {} != {}'
+ .format(idx, size_str(actual_offset),
+ size_str(expected_block*BLOCKSIZE)))
+
+ # last volume is filled up to RECORDSIZE
+ volume_blocks = list(volume_blocks_arg)
+ if (len(actual_sizes) == len(volume_blocks)-1) \
+ and (volume_blocks[-1] == 0):
+ actual_sizes.append(0)
+ volume_blocks[-2] = ceil(volume_blocks[-2] / N_BLOCKS_PER_RECORD) \
+ * N_BLOCKS_PER_RECORD
+ elif len(actual_sizes) == len(volume_blocks) + 1:
+ volume_blocks[-1] = ceil(volume_blocks[-1] / N_BLOCKS_PER_RECORD) \
+ * N_BLOCKS_PER_RECORD
+ else:
+ everything_ok = False
print('wrong number of volumes: {} != {}'
- .format(len(actual_sizes), n_volumes))
- for vol_idx, (actual_size, expected_size) in \
- enumerate(zip(actual_sizes, expected_sizes)):
- if actual_size != expected_size:
- found_err = True
+ .format(len(actual_sizes)-1, len(volume_blocks)))
+
+ for idx, (actual_size, expected_blocks) in \
+ enumerate(zip(actual_sizes, volume_blocks)):
+ if actual_size != expected_blocks * BLOCKSIZE:
+ everything_ok = False
print('wrong size for volume {}: {} != {}'
- .format(vol_idx, actual_size, expected_size))
- if real_offset0 != expected_offset0:
- found_err = True
- print('wrong offset for file0: {} != {}'
- .format(real_offset0, expected_offset0))
- if real_offset1 != expected_offset1:
- found_err = True
- print('wrong offset for file1: {} != {}'
- .format(real_offset1, expected_offset1))
+ .format(idx, size_str(actual_size),
+ size_str(expected_blocks * BLOCKSIZE)))
- return ~found_err
+ return everything_ok
def main():
see module doc for more info
"""
+ N = N_BLOCKS_PER_RECORD
+ M = MAX_VOLUME_BLOCKS
+
+ # define tests by numbers of blocks:
+ # n_blocks file 0, 1; n_blocks vol0, 1, offset Info0, Info1, 0-blocks
+ tests = (((N-5, 1), (N, 0), (0, N-4, N-2)),
+ ((M-5, 1), (M, 0), (0, M-4, M-2)),
+ ((M-4, 1), (M+1, 0), (0, M-3, M-1)),
+ ((M-3, 1), (M+2, 0), (0, M-2, M)),
+ ((M-2, 1), (M-2, 6), (0, 2, 4)),
+ ((M-5, 2), (M-2, 4), (0, M-4, 2)),
+ ((M-4, 2), (M-1, 4), (0, M-3, 2)))
+
n_errs = 0
with TemporaryDirectory() as temp_dir:
- test_succeeded = test(temp_dir, 3*BLOCKSIZE, 1*BLOCKSIZE)
- if not test_succeeded:
- n_errs += 1
+ for size_comb_idx, (file_sizes, vol_sizes, offsets) \
+ in enumerate(tests):
+ print('-' * 72)
+ print('size combination {}: ({}, {})'
+ .format(size_comb_idx, *file_sizes))
+ test_succeeded = test(temp_dir, file_sizes, vol_sizes, offsets)
+ if not test_succeeded:
+ n_errs += 1
return n_errs