added performance test script
authorChristian Herdtweck <christian.herdtweck@intra2net.com>
Wed, 15 Jun 2016 09:19:09 +0000 (11:19 +0200)
committerChristian Herdtweck <christian.herdtweck@intra2net.com>
Wed, 15 Jun 2016 11:18:03 +0000 (13:18 +0200)
testing/test_performance.py [new file with mode: 0644]

diff --git a/testing/test_performance.py b/testing/test_performance.py
new file mode 100644 (file)
index 0000000..0a7fbef
--- /dev/null
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+
+""" Test performance of compressed write
+
+Creates a single big volume with random files added
+
+.. codeauthor:: Intra2net <info@intra2net.com>
+"""
+
+from tempfile import NamedTemporaryFile
+from time import perf_counter
+import os
+
+if __name__ == '__main__':
+    from os.path import dirname, abspath
+    import sys
+    parent_dir = dirname(dirname(abspath(__file__)))
+    sys.path.insert(0, parent_dir)
+    print('pre-prended {} to sys path'.format(parent_dir))
+import deltatar
+from deltatar.tarfile import TarFile
+
+from test_multivol_compression_sizes import find_random_files
+
+def main():
+    """ Main function, called when running file as script
+
+    see module doc for more info
+    """
+
+    mode = 'w#gz'
+    suffix = '.tgz'
+    #goal_size = 650*1e6 # 1 CD
+    goal_size = 2 * 1e9   # 2 GB (2/3 of space in my /tmp)
+    size_added = 0
+    size_tol = 32 * 1000 # 32k
+    min_size = 5
+    open_time = add_time = close_time = 0.0
+    n_files_added = 0
+
+    with NamedTemporaryFile(prefix='deltatar_multivol_cmp_tst_',
+                            suffix=suffix, delete=True) as file_obj:
+        file_obj.close()
+        result_file_name = file_obj.name
+        print('opening temp file ' + result_file_name)
+
+        start = perf_counter()
+        tarobj = TarFile.open(result_file_name, mode=mode)
+        end = perf_counter()
+        open_time = end - start
+
+        for add_file_name in find_random_files():
+            # check file
+            if add_file_name.startswith(result_file_name[:-6]):
+                continue    # do not accidentally add self
+            file_size = os.lstat(add_file_name).st_size
+            if file_size < min_size:
+                continue
+            if file_size + size_added > goal_size + size_tol:
+                continue    # new file is too big
+
+            # do add
+            start = perf_counter()
+            tarobj.add(add_file_name)
+            end  = perf_counter()
+            add_time += (end - start)
+
+            # update sizes and counts
+            size_added += file_size
+            n_files_added += 1
+            #print('added file of size {:9d}, {:9d} left (file name: {})'
+            #      .format(file_size, goal_size-size_added, add_file_name))
+            if n_files_added % 100 == 0:
+                print('added {:4d} files of overall size {:6.1f}MB, {:6.1f}MB '
+                      'left ({:4.1f}%); avg time to add per MB: {:.3f}s'
+                      .format(n_files_added, size_added/1.e6,
+                              (goal_size-size_added)/1.e6,
+                              size_added / goal_size * 100.,
+                              add_time/size_added*1.e6))
+
+            if size_added > goal_size - size_tol:
+                break
+
+        print('closing file')
+        start = perf_counter()
+        tarobj.close()
+        end = perf_counter()
+        close_time = end - start
+
+        result_size = os.stat(result_file_name).st_size
+
+    # summarize
+    print('time to open/close the tar file: {:.3f} / {:.3f}ms'
+          .format(open_time*1000., close_time*1000.))
+    print('time to add {} files: {:.3f}s (avg {:.3f}ms per file)'
+          .format(n_files_added, add_time, add_time / n_files_added * 1000.))
+    print('average added file size: {:.3f}KB'
+          .format(size_added/n_files_added/1000.))
+    print('time to add per MB: {:.3f}s'.format(add_time/size_added*1.0e6))
+    print('size of result file: {} --> compression ratio {:.1f}'
+          .format(result_size, size_added/result_size))
+
+
+if __name__ == '__main__':
+    main()