From 3b8cae5abc1269336b09fe2a13143a9d69746ba0 Mon Sep 17 00:00:00 2001 From: Eduardo Robles Elvira Date: Tue, 22 Jul 2014 19:46:12 +0200 Subject: [PATCH] improving benchmark tests and their documentation --- docs/benchmark.txt | 31 ----- run.py | 84 ------------- testing/create_pseudo_random_files.py | 222 +++++++++++++++++++++++++++++++++ testing/run_benchmark.py | 103 +++++++++++++++ wgeneration.py | 221 -------------------------------- 5 files changed, 325 insertions(+), 336 deletions(-) delete mode 100644 docs/benchmark.txt delete mode 100644 run.py create mode 100755 testing/create_pseudo_random_files.py create mode 100755 testing/run_benchmark.py delete mode 100755 wgeneration.py diff --git a/docs/benchmark.txt b/docs/benchmark.txt deleted file mode 100644 index 28dffa9..0000000 --- a/docs/benchmark.txt +++ /dev/null @@ -1,31 +0,0 @@ -For testing tarfile and zlib with different python version: - -* python-delta-tar tarfile - Make test using two modes: without compress and with gz compress. - Run wgeneration.py for create a directory with files. (python2) - python wgeneration.py --help for more infomation. - example: wgeneration.py --ndir 20 --nfile 100 --size 100 --path source_dir - Time: - time python3.3.2 run.py --test delta-tarfile - time python3.4.1 run.py --test delta-tarfile - Profiling: - python3.3.2 run.py --test delta-tarfile --profile - python3.4.1 run.py --test delta-tarfile --profile - - -* tarfile - Make test using two modes: without compress and with gz compress. - Same as python-delta-tar, change for --test tarfile - - -* zlib - Make test using three modes: without compress(0), middle compress(6) and better compress(9) - Run wgeneration.py for create a directory with a file. - python wgeneration.py --help for more infomation. - example: wgeneration.py --ndir 1 --nfile 1 --size 10 --distribute_size 100 --distribute_files 100 --path source_dir - Time: - time python3.3.2 run.py --test zlib - time python3.4.1 run.py --test zlib - Profiling: - python3.3.2 run.py --test zlib --profile - python3.4.1 run.py --test zlib --profile diff --git a/run.py b/run.py deleted file mode 100644 index 196a24d..0000000 --- a/run.py +++ /dev/null @@ -1,84 +0,0 @@ -import argparse -import os -import zlib -import fnmatch -import cProfile -import io -import pstats - - -def main(parser): - results = parser.parse_args() - if results.test.endswith("tarfile"): - test_tarfile(results.test, results.profile, results.sort) - elif results.test == 'zlib': - test_zlib(results.profile, results.sort) - else: - parser.print_help() - - -def test_tarfile(res, profile=False, sort=None): - if res == 'delta-tarfile': - from deltatar.tarfile import TarFile - elif res == 'tarfile': - import tarfile as TarFile - if profile: - pr = cProfile.Profile() - pr.enable() - modes = ('w', 'w:gz') - for m in range(len(modes)): - print(modes[m]) - fo = open('tarfile_dir%d.tar' % m, 'wb') - tar = TarFile.open(mode=modes[m], fileobj=fo) - tar.add('source_dir/') - fo.close() - if profile: - pr.disable() - print_profile(pr, sort) - os.system('rm -rf tarfile_dir*') - - -def test_zlib(profile=False, sort=None): - for root, dirnames, filenames in os.walk('source_dir/'): - for filename in fnmatch.filter(filenames, '*'): - match = os.path.join(root, filename) - break - if not match: - print('file not found. ') - return - fo = open(match, 'r') - lines = fo.readlines() - fo.close() - - if profile: - pr = cProfile.Profile() - pr.enable() - for m in [0, 6, 9]: - print(m) - compress_lines = zlib.compress(bytes(''.join(lines), 'utf-8'), m) - decompress_lines = zlib.decompress(compress_lines) - if profile: - pr.disable() - print_profile(pr, sort) - - -def print_profile(pr, sort): - s = io.StringIO() - if not sort: - sort = 'cumulative' - ps = pstats.Stats(pr, stream=s).sort_stats(sort) - ps.print_stats() - print(s.getvalue()) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Profiling test options. ') - parser.add_argument('--test', choices=['delta-tarfile', 'tarfile', 'zlib'], - help='Select option for testing. ') - parser.add_argument('--profile', action='store_true', - help='Enable profile') - parser.add_argument('--sort', help='Sort output profile', choices=[ - 'calls', 'cumulative', 'cumtime', 'file', 'filename', 'module', - 'ncalls', 'pcalls', 'line', 'name', 'nfl', 'stdname', 'time', - 'tottime']) - main(parser) diff --git a/testing/create_pseudo_random_files.py b/testing/create_pseudo_random_files.py new file mode 100755 index 0000000..a16de77 --- /dev/null +++ b/testing/create_pseudo_random_files.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 + +import argparse +import os +import random +import string +from hashlib import sha512 as hashfunc + + +def create_rand_name(size=None): + """ Create rand name using alphanumeric char. + Param size: size of str name. + Type size: int. + Return: random name. + Rtype: str. + """ + if not size: + size = random.randint(5, 15) + name = '' + for x in range(size): + name += random.choice(string.hexdigits) + return name + + +def create_rand_folder(directory): + """ Create rand folder in directory. + Param directory: folder where create new rand folder. + Type directory: str. + Return: path with new directory. + Rtype: str. + """ + new_dir = os.path.join(directory, create_rand_name()) + if not os.path.exists(new_dir): + os.makedirs(new_dir) + else: + new_dir = create_rand_folder(directory) + return new_dir + + +def create_rand_cad(size=1024): + """ Create rand str using hexadecimal char and hash sha512. + Param size: size in byte of str for generate. + Type size: int. + Return: random name. + Rtype: str. + """ + cads = [hashfunc(create_rand_name().encode('utf-8')).hexdigest()] + last = cads[0] + for i in range(int(size / 128)): + last = hashfunc(last[:8].encode('utf-8')).hexdigest() + cads.append(last) + return ''.join(cads)[:size] + + +def create_file(path, size): + """ Get rand name using alphanumeric char. + Param size: size of str name. + Type size: int. + Return: random name. + Rtype: str. + """ + filename = create_rand_name() + new_file = os.path.join(path, filename) + if not os.path.exists(new_file): + with open(new_file, 'w') as f: + f.write(create_rand_cad(size)) + else: + create_file(path, size) + + +def generate_list_sizes(nfile, size, distribute_size): + """ Generate list of size for use it with create_files. + Param nfile: . + Type nfile: . + Param size: . + Type size: . + Param distribute_size: . + Type distribute_size: . + Return: . + Rtype: . + """ + list_sizes = [] + aux_size = size + normal_size = int(size / nfile) + var = max(1, int(normal_size - normal_size * distribute_size / 100)) + for s in range(nfile): + if aux_size <= 0: + list_sizes.append(0) + continue + tmp_size = random.randint(normal_size - var, normal_size + var) + aux_size -= tmp_size + if tmp_size < 0: + list_sizes.append(tmp_size + aux_size) + else: + list_sizes.append(tmp_size) + return list_sizes + + +def get_files_per_folder(nfile, ndir, distribute_files): + """ Get numbers of files for each folder. + Param nfile: . + Type nfile: . + Param ndir: . + Type ndir: . + Param distribute_files: . + Type distribute_files: . + Return: List of numbers. + Rtype: list(int). + """ + list_files = [] + aux_files = nfile + f_per_dir = int(nfile / ndir) + f_remainder = int(nfile % ndir) + for d in range(ndir): + if (aux_files <= 0): + list_files.append(0) + continue + files = 0 + if (f_per_dir > 0): + var = max(1, int(f_per_dir - f_per_dir * distribute_files / 100)) + files += random.randint(f_per_dir - var, f_per_dir + var) + var = f_remainder - f_remainder * distribute_files / 100 + files += random.randint(f_remainder - var, f_remainder + var) + aux_files -= files + if (aux_files <= 0): + list_files.append(files + aux_files) + continue + list_files.append(files) + if (aux_files > 0): + list_files[-1] += aux_files + return list_files + + +def main(parser): + if parser.seed is not None: + seed = parser.seed + else: + seed = random.randint(0, 1000000000) + random.seed(seed) + if parser.ndir is not None: + ndir = parser.ndir + else: + ndir = random.randint(0, 50000) + if parser.deep is not None: + deep = parser.deep + else: + deep = random.randint(0, 100) + if parser.nfile: + nfile = parser.nfile + else: + nfile = random.randint(1, 500000) + if parser.size: + size = parser.size * 1024 * 1024 # MB to byte + else: + size = random.randint(1, 5000) + size = size * 1024 * 1024 # MB to byte + if parser.distribute_files is not None: + distribute_files = parser.distribute_files + else: + distribute_files = random.randint(0, 100) + if parser.distribute_size is not None: + distribute_size = parser.distribute_size + else: + distribute_size = random.randint(0, 100) + if parser.path: + path = os.path.abspath(parser.path) + if not os.path.exists(path): + os.makedirs(path) + else: + path = os.getcwd() + + print("Using seed %d: " % seed) + print("Generate %d folders with %d%% of deep." % (ndir, deep)) + print("Generate %d files with total size %d MB." % (nfile, + size / 1024 / 1024)) + print("\tDistribute (files, size): (%d%%, %d%%)." % (distribute_files, + distribute_size)) + + list_dir = [[path, ndir, nfile]] + list_sizes = generate_list_sizes(nfile, size, distribute_size) + list_files = get_files_per_folder(nfile, ndir, distribute_files) + + while ndir > 0: + new_dir = create_rand_folder(list_dir[0][0]) + ndir -= 1 + for i in range(list_files[0]): + create_file(new_dir, list_sizes[i]) + del list_sizes[:list_files[0]] + del list_files[0] + current_ndir = list_dir[0][1] - 1 + if (current_ndir == 0): + del list_dir[0] + else: + ndir_deep = current_ndir * deep / 100 + ndir_path = current_ndir - ndir_deep + if (ndir_deep > 0): + list_dir.append([new_dir, ndir_deep]) + if (ndir_path > 0): + list_dir[0][1] = ndir_path + else: + del list_dir[0] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Wgeneration option. ') + parser.add_argument('--seed', type=int, action='store', help='Seed. ') + parser.add_argument('--ndir', type=int, action='store', + help='Number of directories for create. ') + parser.add_argument('--deep', type=int, action='store', + help='Percent for distribute deep.') + parser.add_argument('--nfile', type=int, action='store', + help='Number of files for create. ') + parser.add_argument('--size', type=int, action='store', + help='Total size of files (MB) ') + parser.add_argument('--distribute_files', type=int, action='store', + help='Percent for distribute files. ') + parser.add_argument('--distribute_size', type=int, action='store', + help='Percent to distribute size of files. ') + parser.add_argument('--path', action='store', help='Path to generate. ') + + results = parser.parse_args() + main(results) diff --git a/testing/run_benchmark.py b/testing/run_benchmark.py new file mode 100755 index 0000000..48daf3d --- /dev/null +++ b/testing/run_benchmark.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2014 Intra2net AG +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# + +# Authors: Victor Ramirez de la Corte +# Eduardo Robles Elvira + +import argparse +import os +import zlib +import fnmatch +import cProfile +import io +import pstats + + +def main(): + ''' + Main function, parses the command line arguments and launches the appropiate + benchmark test. + ''' + parser = argparse.ArgumentParser(description='Profiling test options. ') + parser.add_argument('-t', '--test', choices=['delta-tarfile', 'tarfile'], + help='Select option for testing. ') + parser.add_argument('-l', '--compression-level', type=int, default=9, + choices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + help='Select level of compression for zlib. ') + parser.add_argument('-p', '--path', + help='path to the file/dir to compress.') + parser.add_argument('-P', '--profile', action='store_true', + help='Enable profile') + parser.add_argument('-m', '--tarmode', default="w:gz", + help='Enable profile') + parser.add_argument( + '-s', '--sort', + help='Sort output profile', + choices=[ 'calls', 'cumulative', 'cumtime', 'file', 'path', 'module', + 'ncalls', 'pcalls', 'line', 'name', 'nfl', 'stdname', 'time', + 'tottime'], + default="cumulative") + + pargs = parser.parse_args() + compression_level = pargs.compression_level + path = pargs.path + + # start profiling + if pargs.profile: + pr = cProfile.Profile() + pr.enable() + + if pargs.test in ['delta-tarfile', 'tarfile']: + test_tarfile(pargs.test, compression_level, path, pargs.tarmode) + else: + parser.print_help() + + # end profiling + if pargs.profile: + pr.disable() + print_profile(pr, pargs.sort) + +def test_tarfile(res, compression_level=9, path='source_dir', tarmode="w:gz"): + ''' + Test that creates a tarfile called test_tarfile.tar.gz + ''' + if res == 'delta-tarfile': + deltatar = os.path.abspath("../deltatar") + import sys + sys.path.append("..") + from deltatar import tarfile as TarFile + elif res == 'tarfile': + import tarfile as TarFile + + print("creating a tarfile with mode = '%s'" % tarmode) + with open('test_tarfile.tar.gz', 'wb') as fo: + tar = TarFile.open(mode=tarmode, fileobj=fo, + compresslevel=compression_level) + tar.add(path) + +def print_profile(pr, sort): + s = io.StringIO() + if not sort: + sort = 'cumulative' + ps = pstats.Stats(pr, stream=s).sort_stats(sort) + ps.print_stats(20) + print(s.getvalue()) + + +if __name__ == '__main__': + main() diff --git a/wgeneration.py b/wgeneration.py deleted file mode 100755 index 8574119..0000000 --- a/wgeneration.py +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/python -import argparse -import os -import random -import string -from hashlib import sha512 as hashfunc - - -def create_rand_name(size=None): - """ Create rand name using alphanumeric char. - Param size: size of str name. - Type size: int. - Return: random name. - Rtype: str. - """ - if not size: - size = random.randint(5, 15) - name = '' - for x in range(size): - name += random.choice(string.hexdigits) - return name - - -def create_rand_folder(directory): - """ Create rand folder in directory. - Param directory: folder where create new rand folder. - Type directory: str. - Return: path with new directory. - Rtype: str. - """ - new_dir = os.path.join(directory, create_rand_name()) - if not os.path.exists(new_dir): - os.makedirs(new_dir) - else: - new_dir = create_rand_folder(directory) - return new_dir - - -def create_rand_cad(size=1024): - """ Create rand str using hexadecimal char and hash sha512. - Param size: size in byte of str for generate. - Type size: int. - Return: random name. - Rtype: str. - """ - cads = [hashfunc(create_rand_name()).hexdigest()] - last = cads[0] - for i in range(size / 128): - last = hashfunc(last[:8]).hexdigest() - cads.append(last) - return ''.join(cads)[:size] - - -def create_file(path, size): - """ Get rand name using alphanumeric char. - Param size: size of str name. - Type size: int. - Return: random name. - Rtype: str. - """ - filename = create_rand_name() - new_file = os.path.join(path, filename) - if not os.path.exists(new_file): - with open(new_file, 'w') as f: - f.write(create_rand_cad(size)) - else: - create_file(path, size) - - -def generate_list_sizes(nfile, size, distribute_size): - """ Generate list of size for use it with create_files. - Param nfile: . - Type nfile: . - Param size: . - Type size: . - Param distribute_size: . - Type distribute_size: . - Return: . - Rtype: . - """ - list_sizes = [] - aux_size = size - normal_size = int(size / nfile) - var = int(normal_size - normal_size * distribute_size / 100) - for s in range(nfile): - if aux_size <= 0: - list_sizes.append(0) - continue - tmp_size = random.randint(normal_size - var, normal_size + var) - aux_size -= tmp_size - if tmp_size < 0: - list_sizes.append(tmp_size + aux_size) - else: - list_sizes.append(tmp_size) - return list_sizes - - -def get_files_per_folder(nfile, ndir, distribute_files): - """ Get numbers of files for each folder. - Param nfile: . - Type nfile: . - Param ndir: . - Type ndir: . - Param distribute_files: . - Type distribute_files: . - Return: List of numbers. - Rtype: list(int). - """ - list_files = [] - aux_files = nfile - f_per_dir = int(nfile / ndir) - f_remainder = int(nfile % ndir) - for d in range(ndir): - if (aux_files <= 0): - list_files.append(0) - continue - files = 0 - if (f_per_dir > 0): - var = int(f_per_dir - f_per_dir * distribute_files / 100) - files += random.randint(f_per_dir - var, f_per_dir + var) - var = f_remainder - f_remainder * distribute_files / 100 - files += random.randint(f_remainder - var, f_remainder + var) - aux_files -= files - if (aux_files <= 0): - list_files.append(files + aux_files) - continue - list_files.append(files) - if (aux_files > 0): - list_files[-1] += aux_files - return list_files - - -def main(parser): - if parser.seed is not None: - seed = parser.seed - else: - seed = random.randint(0, 1000000000) - random.seed(seed) - if parser.ndir is not None: - ndir = parser.ndir - else: - ndir = random.randint(0, 50000) - if parser.deep is not None: - deep = parser.deep - else: - deep = random.randint(0, 100) - if parser.nfile: - nfile = parser.nfile - else: - nfile = random.randint(1, 500000) - if parser.size: - size = parser.size * 1024 * 1024 # MB to byte - else: - size = random.randint(1, 5000) - size = size * 1024 * 1024 # MB to byte - if parser.distribute_files is not None: - distribute_files = parser.distribute_files - else: - distribute_files = random.randint(0, 100) - if parser.distribute_size is not None: - distribute_size = parser.distribute_size - else: - distribute_size = random.randint(0, 100) - if parser.path: - path = os.path.abspath(parser.path) - if not os.path.exists(path): - os.makedirs(path) - else: - path = os.getcwd() - - print("Using seed %d: " % seed) - print("Generate %d folders with %d%% of deep." % (ndir, deep)) - print("Generate %d files with total size %d MB." % (nfile, - size / 1024 / 1024)) - print("\tDistribute (files, size): (%d%%, %d%%)." % (distribute_files, - distribute_size)) - - list_dir = [[path, ndir, nfile]] - list_sizes = generate_list_sizes(nfile, size, distribute_size) - list_files = get_files_per_folder(nfile, ndir, distribute_files) - - while ndir > 0: - new_dir = create_rand_folder(list_dir[0][0]) - ndir -= 1 - for i in range(list_files[0]): - create_file(new_dir, list_sizes[i]) - del list_sizes[:list_files[0]] - del list_files[0] - current_ndir = list_dir[0][1] - 1 - if (current_ndir == 0): - del list_dir[0] - else: - ndir_deep = current_ndir * deep / 100 - ndir_path = current_ndir - ndir_deep - if (ndir_deep > 0): - list_dir.append([new_dir, ndir_deep]) - if (ndir_path > 0): - list_dir[0][1] = ndir_path - else: - del list_dir[0] - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Wgeneration option. ') - parser.add_argument('--seed', type=int, action='store', help='Seed. ') - parser.add_argument('--ndir', type=int, action='store', - help='Number of directories for create. ') - parser.add_argument('--deep', type=int, action='store', - help='Percent for distribute deep.') - parser.add_argument('--nfile', type=int, action='store', - help='Number of files for create. ') - parser.add_argument('--size', type=int, action='store', - help='Total size of files (MB) ') - parser.add_argument('--distribute_files', type=int, action='store', - help='Percent for distribute files. ') - parser.add_argument('--distribute_size', type=int, action='store', - help='Percent to distribute size of files. ') - parser.add_argument('--path', action='store', help='Path to generate. ') - - results = parser.parse_args() - main(results) -- 1.7.1