#!/usr/bin/env python3 import argparse import os import random import string from hashlib import sha512 as hashfunc def create_rand_name(size=None): """ Create rand name using alphanumeric char. Param size: size of str name. Type size: int. Return: random name. Rtype: str. """ if not size: size = random.randint(5, 15) name = '' for x in range(size): name += random.choice(string.hexdigits) return name def create_rand_folder(directory): """ Create rand folder in directory. Param directory: folder where create new rand folder. Type directory: str. Return: path with new directory. Rtype: str. """ new_dir = os.path.join(directory, create_rand_name()) if not os.path.exists(new_dir): os.makedirs(new_dir) else: new_dir = create_rand_folder(directory) return new_dir def create_rand_cad(size=1024): """ Create rand str using hexadecimal char and hash sha512. Param size: size in byte of str for generate. Type size: int. Return: random name. Rtype: str. """ cads = [hashfunc(create_rand_name().encode('utf-8')).hexdigest()] last = cads[0] for i in range(int(size / 128)): last = hashfunc(last[:8].encode('utf-8')).hexdigest() cads.append(last) return ''.join(cads)[:size] def create_file(path, size): """ Get rand name using alphanumeric char. Param size: size of str name. Type size: int. Return: random name. Rtype: str. """ filename = create_rand_name() new_file = os.path.join(path, filename) if not os.path.exists(new_file): with open(new_file, 'w') as f: f.write(create_rand_cad(size)) return new_file else: return create_file(path, size) def generate_list_sizes(nfile, size, distribute_size): """ Generate list of size for use it with create_files. Param nfile: . Type nfile: . Param size: . Type size: . Param distribute_size: . Type distribute_size: . Return: . Rtype: . """ list_sizes = [] aux_size = size normal_size = int(size / nfile) var = max(1, int(normal_size - normal_size * distribute_size / 100)) for s in range(nfile): if aux_size <= 0: list_sizes.append(0) continue tmp_size = random.randint(normal_size - var, normal_size + var) aux_size -= tmp_size if tmp_size < 0: list_sizes.append(tmp_size + aux_size) else: list_sizes.append(tmp_size) return list_sizes def get_files_per_folder(nfile, ndir, distribute_files): """ Get numbers of files for each folder. Param nfile: . Type nfile: . Param ndir: . Type ndir: . Param distribute_files: . Type distribute_files: . Return: List of numbers. Rtype: list(int). """ list_files = [] aux_files = nfile f_per_dir = int(nfile / ndir) f_remainder = int(nfile % ndir) for d in range(ndir): if (aux_files <= 0): list_files.append(0) continue files = 0 if (f_per_dir > 0): var = max(1, int(f_per_dir - f_per_dir * distribute_files / 100)) files += random.randint(f_per_dir - var, f_per_dir + var) var = f_remainder - f_remainder * distribute_files // 100 files += random.randint(f_remainder - var, f_remainder + var) aux_files -= files if (aux_files <= 0): list_files.append(files + aux_files) continue list_files.append(files) if (aux_files > 0): list_files[-1] += aux_files return list_files def main(args = None): parser = argparse.ArgumentParser(description='Wgeneration option. ') parser.add_argument('--seed', type=int, action='store', help='Seed. ') parser.add_argument('--ndir', type=int, action='store', help='Number of directories for create. ') parser.add_argument('--deep', type=int, action='store', help='Percent for distribute deep.') parser.add_argument('--nfile', type=int, action='store', help='Number of files for create. ') parser.add_argument('--size', type=int, action='store', help='Total size of files (MB) ') parser.add_argument('--distribute_files', type=int, action='store', help='Percent for distribute files. ') parser.add_argument('--distribute_size', type=int, action='store', help='Percent to distribute size of files. ') parser.add_argument('--path', action='store', help='Path to generate. ') if not args: parsed_args = parser.parse_args() else: parsed_args = parser.parse_args(args) if parsed_args.seed is not None: seed = parsed_args.seed else: seed = random.randint(0, 1000000000) random.seed(seed) if parsed_args.ndir is not None: ndir = parsed_args.ndir else: ndir = random.randint(0, 50000) if parsed_args.deep is not None: deep = parsed_args.deep else: deep = random.randint(0, 100) if parsed_args.nfile: nfile = parsed_args.nfile else: nfile = random.randint(1, 500000) if parsed_args.size: size = parsed_args.size * 1024 * 1024 # MB to byte else: size = random.randint(1, 5000) size = size * 1024 * 1024 # MB to byte if parsed_args.distribute_files is not None: distribute_files = parsed_args.distribute_files else: distribute_files = random.randint(0, 100) if parsed_args.distribute_size is not None: distribute_size = parsed_args.distribute_size else: distribute_size = random.randint(0, 100) if parsed_args.path: path = os.path.abspath(parsed_args.path) if not os.path.exists(path): os.makedirs(path) else: path = os.getcwd() print("Using seed %d: " % seed) print("Generate %d folders with %d%% of deep." % (ndir, deep)) print("Generate %d files with total size %d MB." % (nfile, size / 1024 / 1024)) print("\tDistribute (files, size): (%d%%, %d%%)." % (distribute_files, distribute_size)) list_dir = [[path, ndir, nfile]] list_sizes = generate_list_sizes(nfile, size, distribute_size) list_files = get_files_per_folder(nfile, ndir, distribute_files) while ndir > 0: new_dir = create_rand_folder(list_dir[0][0]) ndir -= 1 for i in range(list_files[0]): create_file(new_dir, list_sizes[i]) del list_sizes[:list_files[0]] del list_files[0] current_ndir = list_dir[0][1] - 1 if (current_ndir == 0): del list_dir[0] else: ndir_deep = current_ndir * deep / 100 ndir_path = current_ndir - ndir_deep if (ndir_deep > 0): list_dir.append([new_dir, ndir_deep]) if (ndir_path > 0): list_dir[0][1] = ndir_path else: del list_dir[0] return seed if __name__ == "__main__": main()