from tempfile import TemporaryDirectory, NamedTemporaryFile
from time import time
from traceback import format_exc
-from os.path import dirname, abspath
+from os.path import dirname, abspath, basename
# try to import the tarfile from source, not the globally installed one
source_base = dirname(dirname(abspath(__file__)))
#: number of blocks per tar volume file
MAX_VOLUME_BLOCKS = N_BLOCKS_PER_RECORD + 1
-#: size of big file
-BIG_SIZE = MAX_VOLUME_BLOCKS * BLOCKSIZE
+#: size of big file: fits into first volume even if not compressed
+BIG_SIZE = (MAX_VOLUME_BLOCKS-3) * BLOCKSIZE
#: max size of small files
SMALL_MAX_SIZE = 2 * BLOCKSIZE
returned hash must be compatible with :py:func:`hash_file`
"""
hash_obj = hash_type()
+ if random.getrandbits(1) == 1:
+ fill_type = 0
+ else:
+ fill_type = 1
with NamedTemporaryFile(dir=temp_dir, delete=False) as temp_file:
- if random.getrandbits(1) == 1:
+ if fill_type == 0:
fill_file_repetitive(temp_file, file_size, hash_obj)
- else:
+ elif fill_type == 1:
fill_file_random(temp_file, file_size, hash_obj)
+ else:
+ raise ValueError('unexpected fill type!')
- return temp_file.name, hash_obj.hexdigest()
+ return temp_file.name, hash_obj.hexdigest(), fill_type
def fill_file_repetitive(temp_file, file_size, hash_obj):
""" fills file with repetitive data """
bytes_written = 0
data = bytes(range(256))
- while file_size-bytes_written < 256:
+ while file_size-bytes_written > 256:
temp_file.write(data)
hash_obj.update(data)
bytes_written += 256
""" fills file with randomized data """
bytes_written = 0
data = bytearray(range(256))
- while file_size-bytes_written < 255:
+ while file_size-bytes_written > 255:
random.shuffle(data)
temp_file.write(data[:-1]) # write all but last to make a difference
hash_obj.update(data[:-1]) # between files of same size
# define local volume handler so can read/write volume_handler_called
volume_handler_called = False
+ offset_end_vol0 = None
def new_volume_handler(tarobj, base_name, volume_number):
""" called from tarobj when creating a new volume """
nonlocal volume_handler_called
+ nonlocal offset_end_vol0
volume_handler_called = True
+ offset_end_vol0 = tarobj.offset
volume_path = "%s.%d" % (base_name, volume_number)
tarobj.open_volume(volume_path)
size_left_func = tarobj._size_left_file
# add big file
- big_name, big_hash = create_file(BIG_SIZE, temp_dir)
+ big_name, big_hash, file_info = create_file(BIG_SIZE, temp_dir)
files[big_name] = big_hash
- dprnt('adding big file {} of size {} with hash {}'
- .format(big_name, BIG_SIZE, big_hash))
- tarobj.add(big_name)
+ dprnt('adding big file {} of size {}, info {} with hash {}'
+ .format(big_name, BIG_SIZE, file_info, big_hash))
+ tarobj.add(big_name, arcname=basename(big_name))
dprnt('now offset={}, size_left={}'
.format(tarobj.offset, size_left_func()))
while not volume_handler_called:
# add small file
small_size = random.randint(0, SMALL_MAX_SIZE)
- small_name, small_hash = create_file(small_size, temp_dir)
+ small_name, small_hash, file_info = create_file(small_size,
+ temp_dir)
files[small_name] = small_hash
- dprnt('adding small file {} of size {} with hash {}'
- .format(small_name, small_size, small_hash))
- tarobj.add(small_name)
+ dprnt('adding small file {} of size {}, info {} with hash {}'
+ .format(small_name, small_size, file_info, small_hash))
+ tarobj.add(small_name, arcname=basename(small_name))
dprnt('now offset={}, size_left={}'
.format(tarobj.offset, size_left_func()))
# close tarobj -- happens in __exit__ of TarFile context
dprnt('closing tar file')
# now tarobj should be closed
+ if not temp_file.closed:
+ dprnt('closing temp file -- this should have happened at volume '
+ 'change!')
+ temp_file.close()
# remember size of first volume (2nd should always be RECORDSIZE)
- dprnt('size of first volume file: {}'
- .format(os.stat(temp_file.name).st_size))
+ dprnt('size of first volume file: {}; offset at vol change: {}'
+ .format(os.stat(temp_file.name).st_size, offset_end_vol0))
if os.stat(temp_file.name + ".1").st_size != RECORDSIZE:
everything_ok = False
dprnt('strange size of 2nd volume: {}'
if len(os.listdir(temp_dir)) != len(files)+2 + n_files_at_start:
everything_ok = False
dprnt('wrong number of files: found {} but expect {}!'
- .format(len(os.listdir()), len(files)+2+n_files_at_start))
- for file_name in os.listdir():
+ .format(len(os.listdir(temp_dir)),
+ len(files)+2+n_files_at_start))
+ for file_name in os.listdir(temp_dir):
dprnt('listdir: {}'.format(file_name))
- for file_name, file_hash in files:
+ for file_name, file_hash in files.items():
if not os.path.exists(file_name):
everything_ok = False
dprnt('failed to find file {} after extraction'
.format(file_name))
+ continue
if hash_file(file_name) != file_hash:
everything_ok = False
dprnt('wrong hash for file {} after extraction: {} != {}'
# more params
fast_fail = True
- print_everything = False
- modes = 'w:tar', 'w:gz', 'w:bz2', 'w|tar', 'w|gz', 'w|bz2', 'w#tar', \
+ print_everything = True
+ modes = 'w:tar', 'w|tar', 'w|gz', 'w|bz2', 'w#tar', \
'w#gz', #'w#gz.aes128', 'w#gz.aes256', 'w#aes128', 'w#aes256'
+ # not currently working: 'w:gz', 'w:bz2',
# seed properly
random.seed()
.format(n_runs, (time()-start_time)/n_runs))
except KeyboardInterrupt:
print('Stopped by user')
+ for line in format_exc().splitlines():
+ print(line)
# summarize
print('')