From 283153fb1645ba452aaed32b7c7847fbbce35e5a Mon Sep 17 00:00:00 2001
From: Christian Herdtweck <christian.herdtweck@intra2net.com>
Date: Thu, 21 Jul 2016 18:15:12 +0200
Subject: [PATCH] continue testing of volume split with compressed archives; may have found another TarFile bug

---
 testing/test_volume_split.py |  125 ++++++++++++++++++++++++++++++-----------
 1 files changed, 91 insertions(+), 34 deletions(-)

diff --git a/testing/test_volume_split.py b/testing/test_volume_split.py
index 9cd2e27..85d394a 100755
--- a/testing/test_volume_split.py
+++ b/testing/test_volume_split.py
@@ -19,6 +19,7 @@ More precisely:
 """
 
 import os
+from os.path import dirname, abspath, basename
 import sys
 import random
 from math import log2
@@ -26,7 +27,6 @@ from hashlib import md5 as hash_type
 from tempfile import TemporaryDirectory, NamedTemporaryFile
 from time import time
 from traceback import format_exc
-from os.path import dirname, abspath, basename
 
 # try to import the tarfile from source, not the globally installed one
 source_base = dirname(dirname(abspath(__file__)))
@@ -50,6 +50,9 @@ BIG_SIZE = (MAX_VOLUME_BLOCKS-3) * BLOCKSIZE
 #: max size of small files
 SMALL_MAX_SIZE = 2 * BLOCKSIZE
 
+#: max small files to add
+SMALL_MAX_NUMBER = 200
+
 #: number of bits used for seeding
 SEED_BITS = int(log2(sys.maxsize+1))
 
@@ -123,17 +126,26 @@ def hash_file(file_name):
     return hash_obj.hexdigest()
 
 
-def do_test(seed, tar_mode, temp_dir, print_everything=False):
+def do_test(seed, create_mode, extract_mode, temp_dir, print_everything=False):
     """ a single test run; returns True if everything went ok """
 
     # output is not printed but remembered and only printed in the end
     # if necessary
     output = []
-    dprnt = print
+    if print_everything:
+        print('-' * 72)
+        prefix = '{:9d}: '.format(seed)
+        dprnt = lambda val: print(prefix + val)
+    else:
+        dprnt = output.append
+        dprnt('-' * 72)
     everything_ok = False
 
+    # record params
+    dprnt('using seed {}, mode {} for create and {} for extract'
+          .format(seed, create_mode, extract_mode))
+
     # seed random number generator
-    dprnt('using seed {}'.format(seed))
     random.seed(seed)
 
     # remember number of files in temp dir
@@ -142,25 +154,33 @@ def do_test(seed, tar_mode, temp_dir, print_everything=False):
     # create tar archive
     temp_file = None
     try:
-        everything_ok = True
-        temp_file = NamedTemporaryFile(dir=temp_dir, suffix='.' + tar_mode[2:],
+        temp_file = NamedTemporaryFile(dir=temp_dir,
+                                       suffix='.' + create_mode[2:],
                                        delete=False, mode='wb')
+
+        # preparations
+        everything_ok = True
         files = {}
+        if print_everything:
+            tar_debug = 3
+        else:
+            tar_debug = None
 
         # define local volume handler so can read/write volume_handler_called
         volume_handler_called = False
-        offset_end_vol0 = None
         def new_volume_handler(tarobj, base_name, volume_number):
             """ called from tarobj when creating a new volume """
             nonlocal volume_handler_called
-            nonlocal offset_end_vol0
             volume_handler_called = True
-            offset_end_vol0 = tarobj.offset
-            volume_path = "%s.%d" % (base_name, volume_number)
+            volume_path = "%s.%d" % (temp_file.name, volume_number)
+            dprnt('in volume handler, at offset {}: open volume {}'
+                  .format(tarobj.offset, volume_path))
             tarobj.open_volume(volume_path)
+            dprnt('in volume handler, after open_volume: offset is {}'
+                  .format(tarobj.offset))
 
         dprnt('creating archive {}'.format(temp_file.name))
-        with TarFile.open(mode=tar_mode, fileobj=temp_file,
+        with TarFile.open(mode=create_mode, fileobj=temp_file, debug=tar_debug,
                           max_volume_size=MAX_VOLUME_BLOCKS * BLOCKSIZE,
                           new_volume_handler=new_volume_handler) as tarobj:
 
@@ -180,6 +200,12 @@ def do_test(seed, tar_mode, temp_dir, print_everything=False):
 
             # loop
             while not volume_handler_called:
+                if len(files) > SMALL_MAX_NUMBER:
+                    everything_ok = False
+                    dprnt('reached max number {} of files in archive'
+                          .format(len(files)))
+                    break
+
                 # add small file
                 small_size = random.randint(0, SMALL_MAX_SIZE)
                 small_name, small_hash, file_info = create_file(small_size,
@@ -200,8 +226,8 @@ def do_test(seed, tar_mode, temp_dir, print_everything=False):
             temp_file.close()
 
         # remember size of first volume (2nd should always be RECORDSIZE)
-        dprnt('size of first volume file: {}; offset at vol change: {}'
-              .format(os.stat(temp_file.name).st_size, offset_end_vol0))
+        dprnt('size of first volume file: {}'
+              .format(os.stat(temp_file.name).st_size))
         if os.stat(temp_file.name + ".1").st_size != RECORDSIZE:
             everything_ok = False
             dprnt('strange size of 2nd volume: {}'
@@ -213,7 +239,8 @@ def do_test(seed, tar_mode, temp_dir, print_everything=False):
             os.unlink(file_name)
 
         # extract
-        with TarFile.open(mode='r' + tar_mode[1:], name=temp_file.name,
+        with TarFile.open(mode=extract_mode, name=temp_file.name,
+                          debug=tar_debug,
                           new_volume_handler=new_volume_handler) as tarobj:
             tarobj.extractall(path=temp_dir)
 
@@ -252,10 +279,12 @@ def do_test(seed, tar_mode, temp_dir, print_everything=False):
             except FileNotFoundError:
                 pass
 
-    if print_everything or not everything_ok:
+    if (not print_everything) and (not everything_ok):
         prefix = '{:9d}: '.format(seed)
         for line in output:
             print(prefix + line)
+    elif print_everything and everything_ok:
+        dprnt('ended successfully')
 
     return everything_ok
 
@@ -278,16 +307,17 @@ def test_forever():
     # more params
     fast_fail = True
     print_everything = True
-    modes = 'w:tar', 'w|tar', 'w|gz', 'w|bz2', 'w#tar', \
-            'w#gz', #'w#gz.aes128', 'w#gz.aes256', 'w#aes128', 'w#aes256'
-            # not currently working: 'w:gz', 'w:bz2',
+    create_modes = 'w:tar', 'w|tar', 'w|gz', 'w|bz2', 'w#gz'
+                   #'w#gz.aes128', 'w#gz.aes256', 'w#aes128', 'w#aes256'
+                   # not currently working: 'w:gz', 'w:bz2',
+    extract_mode_starts = 'r:', 'r#', 'r:*'
 
     # seed properly
     random.seed()
 
     # preparations
     n_runs = 0
-    error_seeds = []
+    error_params = []
     do_stop = False
 
     # create temp dir
@@ -296,18 +326,43 @@ def test_forever():
         try:
             start_time = time()
             while not do_stop:
-                for mode in modes:
-                    seed = create_seed()
-                    if not do_test(seed, mode, temp_dir):
-                        error_seeds.append(seed)
-                        if fast_fail:
-                            print('stopping because fast_fail is set')
-                            do_stop = True
+                for create_mode in create_modes:
+                    if do_stop:
+                        break
+                    for extract_start in extract_mode_starts:
+                        if do_stop:
                             break
-                    n_runs += 1
-                    if n_runs % 100 == 0:
-                        print('at run {} ({:.3f}s per run)'
-                              .format(n_runs, (time()-start_time)/n_runs))
+
+                        # figure out extract mode for tar file
+                        if ('#' in extract_start) and ('#' not in create_mode):
+                            continue    # not possible
+                        full_extract_mode = extract_start
+                        if extract_start[-1] != '*':
+                            full_extract_mode += create_mode[2:]
+
+                        # create seed to re-create results
+                        seed = create_seed()
+
+                        # run test
+                        n_runs += 1
+                        everything_ok = \
+                            do_test(seed, create_mode, full_extract_mode,
+                                    temp_dir,
+                                    print_everything=print_everything)
+
+                        # remember error
+                        if not everything_ok:
+                            error_params.append((seed, create_mode,
+                                                 full_extract_mode))
+                            if fast_fail:
+                                print('stopping because fast_fail is set')
+                                do_stop = True
+                                break
+
+                        # print some output from time to time
+                        if n_runs % 100 == 0:
+                            print('at run {} ({:.3f}s per run)'
+                                  .format(n_runs, (time()-start_time)/n_runs))
         except KeyboardInterrupt:
             print('Stopped by user')
             for line in format_exc().splitlines():
@@ -315,17 +370,19 @@ def test_forever():
 
     # summarize
     print('')
-    print('-'*72)
-    n_errs = len(error_seeds)
+    print('='*72)
+    n_errs = len(error_params)
     duration = time() - start_time
     if n_runs == 0:
         print('summary: no test run has finished')
     else:
         print('summary: {} runs, in {}s ({:.3f}s per run); '
-              '{} with errs ({:.2f}%)'
+              '{} with errs ({:.0f}%)'
               .format(n_runs, duration, duration/n_runs, n_errs,
                       100.0 * float(n_errs)/float(n_runs)))
-        print('seeds that created errors: {}'.format(error_seeds))
+        print('params that created errors')
+        for params in error_params:
+            print(params)
 
 
 if __name__ == '__main__':
-- 
1.7.1