#!/usr/bin/env python3 """ Test performance of compressed write Creates a single big volume with random files added Results on my devel box: ~0.055s/MB --> ~1 minute per GB compress ratio ~1.15 adding encryption had no noticeable effect switching mode between w|gz or w#gz had no noticeable effect switching to bzip2 reduced speed to ~0.174s/MB, compression ratio approx. equal .. codeauthor:: Intra2net """ from tempfile import mkstemp from time import perf_counter import os if __name__ == '__main__': from os.path import dirname, abspath import sys parent_dir = dirname(dirname(abspath(__file__))) sys.path.insert(0, parent_dir) print('pre-prended {} to sys path'.format(parent_dir)) import deltatar from deltatar.tarfile import TarFile from test_multivol_compression_sizes import find_random_files def main(): """ Main function, called when running file as script see module doc for more info """ mode = 'w#gz' suffix = '.tgz' #goal_size = 650*1e6 # 1 CD goal_size = 2 * 1e9 # 2 GB (2/3 of space in my /tmp) size_added = 0 size_tol = 32 * 1000 # 32k min_size = 5 open_time = add_time = close_time = 0.0 n_files_added = 0 result_file_name = None file_handle = None try: # create temp file file_handle, result_file_name = mkstemp( prefix='deltatar_multivol_cmp_tst_', suffix=suffix) os.close(file_handle) file_handle = None print('opening temp file ' + result_file_name) start = perf_counter() tarobj = TarFile.open(result_file_name, mode=mode, password='test1234') end = perf_counter() open_time = end - start for add_file_name in find_random_files(min_size): # check file if add_file_name.startswith(result_file_name[:-6]): continue # do not accidentally add self file_size = os.lstat(add_file_name).st_size if file_size < min_size: continue if file_size + size_added > goal_size + size_tol: continue # new file is too big # do add start = perf_counter() tarobj.add(add_file_name) end = perf_counter() add_time += (end - start) # update sizes and counts size_added += file_size n_files_added += 1 #print('added file of size {:9d}, {:9d} left (file name: {})' # .format(file_size, goal_size-size_added, add_file_name)) if n_files_added % 100 == 0: print('added {:4d} files of overall size {:6.1f}MB, {:6.1f}MB ' 'left ({:4.1f}%); avg time to add per MB: {:.3f}s' .format(n_files_added, size_added/1.e6, (goal_size-size_added)/1.e6, size_added / goal_size * 100., add_time/size_added*1.e6)) if size_added > goal_size - size_tol: break print('closing file') start = perf_counter() tarobj.close() end = perf_counter() close_time = end - start result_size = os.stat(result_file_name).st_size # summarize print('time to open/close the tar file: {:.3f} / {:.3f}ms' .format(open_time*1000., close_time*1000.)) print('time to add {} files: {:.3f}s (avg {:.3f}ms per file)' .format(n_files_added, add_time, add_time / n_files_added * 1000.)) print('average added file size: {:.3f}KB' .format(size_added/n_files_added/1000.)) print('time to add per MB: {:.3f}s'.format(add_time/size_added*1.0e6)) print('size of result file: {} --> compression ratio {:.2f}' .format(result_size, size_added/result_size)) finally: if file_handle: os.close(file_handle) if result_file_name: # del temp file os.unlink(result_file_name) if __name__ == '__main__': main()