From: Victor Ramirez Date: Wed, 16 Jul 2014 14:14:45 +0000 (+0200) Subject: Added profiler test for tarfile and zlib. X-Git-Tag: v2.2~53 X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=b124086e33e8ed8ae605f2ea6616f88b2ffe51f3;p=python-delta-tar Added profiler test for tarfile and zlib. --- diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..28dffa9 --- /dev/null +++ b/README.txt @@ -0,0 +1,31 @@ +For testing tarfile and zlib with different python version: + +* python-delta-tar tarfile + Make test using two modes: without compress and with gz compress. + Run wgeneration.py for create a directory with files. (python2) + python wgeneration.py --help for more infomation. + example: wgeneration.py --ndir 20 --nfile 100 --size 100 --path source_dir + Time: + time python3.3.2 run.py --test delta-tarfile + time python3.4.1 run.py --test delta-tarfile + Profiling: + python3.3.2 run.py --test delta-tarfile --profile + python3.4.1 run.py --test delta-tarfile --profile + + +* tarfile + Make test using two modes: without compress and with gz compress. + Same as python-delta-tar, change for --test tarfile + + +* zlib + Make test using three modes: without compress(0), middle compress(6) and better compress(9) + Run wgeneration.py for create a directory with a file. + python wgeneration.py --help for more infomation. + example: wgeneration.py --ndir 1 --nfile 1 --size 10 --distribute_size 100 --distribute_files 100 --path source_dir + Time: + time python3.3.2 run.py --test zlib + time python3.4.1 run.py --test zlib + Profiling: + python3.3.2 run.py --test zlib --profile + python3.4.1 run.py --test zlib --profile diff --git a/run.py b/run.py new file mode 100644 index 0000000..196a24d --- /dev/null +++ b/run.py @@ -0,0 +1,84 @@ +import argparse +import os +import zlib +import fnmatch +import cProfile +import io +import pstats + + +def main(parser): + results = parser.parse_args() + if results.test.endswith("tarfile"): + test_tarfile(results.test, results.profile, results.sort) + elif results.test == 'zlib': + test_zlib(results.profile, results.sort) + else: + parser.print_help() + + +def test_tarfile(res, profile=False, sort=None): + if res == 'delta-tarfile': + from deltatar.tarfile import TarFile + elif res == 'tarfile': + import tarfile as TarFile + if profile: + pr = cProfile.Profile() + pr.enable() + modes = ('w', 'w:gz') + for m in range(len(modes)): + print(modes[m]) + fo = open('tarfile_dir%d.tar' % m, 'wb') + tar = TarFile.open(mode=modes[m], fileobj=fo) + tar.add('source_dir/') + fo.close() + if profile: + pr.disable() + print_profile(pr, sort) + os.system('rm -rf tarfile_dir*') + + +def test_zlib(profile=False, sort=None): + for root, dirnames, filenames in os.walk('source_dir/'): + for filename in fnmatch.filter(filenames, '*'): + match = os.path.join(root, filename) + break + if not match: + print('file not found. ') + return + fo = open(match, 'r') + lines = fo.readlines() + fo.close() + + if profile: + pr = cProfile.Profile() + pr.enable() + for m in [0, 6, 9]: + print(m) + compress_lines = zlib.compress(bytes(''.join(lines), 'utf-8'), m) + decompress_lines = zlib.decompress(compress_lines) + if profile: + pr.disable() + print_profile(pr, sort) + + +def print_profile(pr, sort): + s = io.StringIO() + if not sort: + sort = 'cumulative' + ps = pstats.Stats(pr, stream=s).sort_stats(sort) + ps.print_stats() + print(s.getvalue()) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Profiling test options. ') + parser.add_argument('--test', choices=['delta-tarfile', 'tarfile', 'zlib'], + help='Select option for testing. ') + parser.add_argument('--profile', action='store_true', + help='Enable profile') + parser.add_argument('--sort', help='Sort output profile', choices=[ + 'calls', 'cumulative', 'cumtime', 'file', 'filename', 'module', + 'ncalls', 'pcalls', 'line', 'name', 'nfl', 'stdname', 'time', + 'tottime']) + main(parser) diff --git a/wgeneration.py b/wgeneration.py new file mode 100755 index 0000000..8574119 --- /dev/null +++ b/wgeneration.py @@ -0,0 +1,221 @@ +#!/usr/bin/python +import argparse +import os +import random +import string +from hashlib import sha512 as hashfunc + + +def create_rand_name(size=None): + """ Create rand name using alphanumeric char. + Param size: size of str name. + Type size: int. + Return: random name. + Rtype: str. + """ + if not size: + size = random.randint(5, 15) + name = '' + for x in range(size): + name += random.choice(string.hexdigits) + return name + + +def create_rand_folder(directory): + """ Create rand folder in directory. + Param directory: folder where create new rand folder. + Type directory: str. + Return: path with new directory. + Rtype: str. + """ + new_dir = os.path.join(directory, create_rand_name()) + if not os.path.exists(new_dir): + os.makedirs(new_dir) + else: + new_dir = create_rand_folder(directory) + return new_dir + + +def create_rand_cad(size=1024): + """ Create rand str using hexadecimal char and hash sha512. + Param size: size in byte of str for generate. + Type size: int. + Return: random name. + Rtype: str. + """ + cads = [hashfunc(create_rand_name()).hexdigest()] + last = cads[0] + for i in range(size / 128): + last = hashfunc(last[:8]).hexdigest() + cads.append(last) + return ''.join(cads)[:size] + + +def create_file(path, size): + """ Get rand name using alphanumeric char. + Param size: size of str name. + Type size: int. + Return: random name. + Rtype: str. + """ + filename = create_rand_name() + new_file = os.path.join(path, filename) + if not os.path.exists(new_file): + with open(new_file, 'w') as f: + f.write(create_rand_cad(size)) + else: + create_file(path, size) + + +def generate_list_sizes(nfile, size, distribute_size): + """ Generate list of size for use it with create_files. + Param nfile: . + Type nfile: . + Param size: . + Type size: . + Param distribute_size: . + Type distribute_size: . + Return: . + Rtype: . + """ + list_sizes = [] + aux_size = size + normal_size = int(size / nfile) + var = int(normal_size - normal_size * distribute_size / 100) + for s in range(nfile): + if aux_size <= 0: + list_sizes.append(0) + continue + tmp_size = random.randint(normal_size - var, normal_size + var) + aux_size -= tmp_size + if tmp_size < 0: + list_sizes.append(tmp_size + aux_size) + else: + list_sizes.append(tmp_size) + return list_sizes + + +def get_files_per_folder(nfile, ndir, distribute_files): + """ Get numbers of files for each folder. + Param nfile: . + Type nfile: . + Param ndir: . + Type ndir: . + Param distribute_files: . + Type distribute_files: . + Return: List of numbers. + Rtype: list(int). + """ + list_files = [] + aux_files = nfile + f_per_dir = int(nfile / ndir) + f_remainder = int(nfile % ndir) + for d in range(ndir): + if (aux_files <= 0): + list_files.append(0) + continue + files = 0 + if (f_per_dir > 0): + var = int(f_per_dir - f_per_dir * distribute_files / 100) + files += random.randint(f_per_dir - var, f_per_dir + var) + var = f_remainder - f_remainder * distribute_files / 100 + files += random.randint(f_remainder - var, f_remainder + var) + aux_files -= files + if (aux_files <= 0): + list_files.append(files + aux_files) + continue + list_files.append(files) + if (aux_files > 0): + list_files[-1] += aux_files + return list_files + + +def main(parser): + if parser.seed is not None: + seed = parser.seed + else: + seed = random.randint(0, 1000000000) + random.seed(seed) + if parser.ndir is not None: + ndir = parser.ndir + else: + ndir = random.randint(0, 50000) + if parser.deep is not None: + deep = parser.deep + else: + deep = random.randint(0, 100) + if parser.nfile: + nfile = parser.nfile + else: + nfile = random.randint(1, 500000) + if parser.size: + size = parser.size * 1024 * 1024 # MB to byte + else: + size = random.randint(1, 5000) + size = size * 1024 * 1024 # MB to byte + if parser.distribute_files is not None: + distribute_files = parser.distribute_files + else: + distribute_files = random.randint(0, 100) + if parser.distribute_size is not None: + distribute_size = parser.distribute_size + else: + distribute_size = random.randint(0, 100) + if parser.path: + path = os.path.abspath(parser.path) + if not os.path.exists(path): + os.makedirs(path) + else: + path = os.getcwd() + + print("Using seed %d: " % seed) + print("Generate %d folders with %d%% of deep." % (ndir, deep)) + print("Generate %d files with total size %d MB." % (nfile, + size / 1024 / 1024)) + print("\tDistribute (files, size): (%d%%, %d%%)." % (distribute_files, + distribute_size)) + + list_dir = [[path, ndir, nfile]] + list_sizes = generate_list_sizes(nfile, size, distribute_size) + list_files = get_files_per_folder(nfile, ndir, distribute_files) + + while ndir > 0: + new_dir = create_rand_folder(list_dir[0][0]) + ndir -= 1 + for i in range(list_files[0]): + create_file(new_dir, list_sizes[i]) + del list_sizes[:list_files[0]] + del list_files[0] + current_ndir = list_dir[0][1] - 1 + if (current_ndir == 0): + del list_dir[0] + else: + ndir_deep = current_ndir * deep / 100 + ndir_path = current_ndir - ndir_deep + if (ndir_deep > 0): + list_dir.append([new_dir, ndir_deep]) + if (ndir_path > 0): + list_dir[0][1] = ndir_path + else: + del list_dir[0] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Wgeneration option. ') + parser.add_argument('--seed', type=int, action='store', help='Seed. ') + parser.add_argument('--ndir', type=int, action='store', + help='Number of directories for create. ') + parser.add_argument('--deep', type=int, action='store', + help='Percent for distribute deep.') + parser.add_argument('--nfile', type=int, action='store', + help='Number of files for create. ') + parser.add_argument('--size', type=int, action='store', + help='Total size of files (MB) ') + parser.add_argument('--distribute_files', type=int, action='store', + help='Percent for distribute files. ') + parser.add_argument('--distribute_size', type=int, action='store', + help='Percent to distribute size of files. ') + parser.add_argument('--path', action='store', help='Path to generate. ') + + results = parser.parse_args() + main(results)