volume split test working enough to find first bugs...
authorChristian Herdtweck <christian.herdtweck@intra2net.com>
Thu, 21 Jul 2016 07:50:09 +0000 (09:50 +0200)
committerChristian Herdtweck <christian.herdtweck@intra2net.com>
Thu, 12 Nov 2020 14:04:34 +0000 (15:04 +0100)
testing/test_volume_split.py

index 43d6121..9cd2e27 100755 (executable)
@@ -26,7 +26,7 @@ from hashlib import md5 as hash_type
 from tempfile import TemporaryDirectory, NamedTemporaryFile
 from time import time
 from traceback import format_exc
-from os.path import dirname, abspath
+from os.path import dirname, abspath, basename
 
 # try to import the tarfile from source, not the globally installed one
 source_base = dirname(dirname(abspath(__file__)))
@@ -44,8 +44,8 @@ N_BLOCKS_PER_RECORD = RECORDSIZE // BLOCKSIZE
 #: number of blocks per tar volume file
 MAX_VOLUME_BLOCKS = N_BLOCKS_PER_RECORD + 1
 
-#: size of big file
-BIG_SIZE = MAX_VOLUME_BLOCKS * BLOCKSIZE
+#: size of big file: fits into first volume even if not compressed
+BIG_SIZE = (MAX_VOLUME_BLOCKS-3) * BLOCKSIZE
 
 #: max size of small files
 SMALL_MAX_SIZE = 2 * BLOCKSIZE
@@ -66,20 +66,26 @@ def create_file(file_size, temp_dir):
     returned hash must be compatible with :py:func:`hash_file`
     """
     hash_obj = hash_type()
+    if random.getrandbits(1) == 1:
+        fill_type = 0
+    else:
+        fill_type = 1
     with NamedTemporaryFile(dir=temp_dir, delete=False) as temp_file:
-        if random.getrandbits(1) == 1:
+        if fill_type == 0:
             fill_file_repetitive(temp_file, file_size, hash_obj)
-        else:
+        elif fill_type == 1:
             fill_file_random(temp_file, file_size, hash_obj)
+        else:
+            raise ValueError('unexpected fill type!')
 
-    return temp_file.name, hash_obj.hexdigest()
+    return temp_file.name, hash_obj.hexdigest(), fill_type
 
 
 def fill_file_repetitive(temp_file, file_size, hash_obj):
     """ fills file with repetitive data """
     bytes_written = 0
     data = bytes(range(256))
-    while file_size-bytes_written < 256:
+    while file_size-bytes_written > 256:
         temp_file.write(data)
         hash_obj.update(data)
         bytes_written += 256
@@ -93,7 +99,7 @@ def fill_file_random(temp_file, file_size, hash_obj):
     """ fills file with randomized data """
     bytes_written = 0
     data = bytearray(range(256))
-    while file_size-bytes_written < 255:
+    while file_size-bytes_written > 255:
         random.shuffle(data)
         temp_file.write(data[:-1])   # write all but last to make a difference
         hash_obj.update(data[:-1])   # between files of same size
@@ -143,10 +149,13 @@ def do_test(seed, tar_mode, temp_dir, print_everything=False):
 
         # define local volume handler so can read/write volume_handler_called
         volume_handler_called = False
+        offset_end_vol0 = None
         def new_volume_handler(tarobj, base_name, volume_number):
             """ called from tarobj when creating a new volume """
             nonlocal volume_handler_called
+            nonlocal offset_end_vol0
             volume_handler_called = True
+            offset_end_vol0 = tarobj.offset
             volume_path = "%s.%d" % (base_name, volume_number)
             tarobj.open_volume(volume_path)
 
@@ -161,11 +170,11 @@ def do_test(seed, tar_mode, temp_dir, print_everything=False):
                 size_left_func = tarobj._size_left_file
 
             # add big file
-            big_name, big_hash = create_file(BIG_SIZE, temp_dir)
+            big_name, big_hash, file_info = create_file(BIG_SIZE, temp_dir)
             files[big_name] = big_hash
-            dprnt('adding big file {} of size {} with hash {}'
-                  .format(big_name, BIG_SIZE, big_hash))
-            tarobj.add(big_name)
+            dprnt('adding big file {} of size {}, info {} with hash {}'
+                  .format(big_name, BIG_SIZE, file_info, big_hash))
+            tarobj.add(big_name, arcname=basename(big_name))
             dprnt('now offset={}, size_left={}'
                   .format(tarobj.offset, size_left_func()))
 
@@ -173,21 +182,26 @@ def do_test(seed, tar_mode, temp_dir, print_everything=False):
             while not volume_handler_called:
                 # add small file
                 small_size = random.randint(0, SMALL_MAX_SIZE)
-                small_name, small_hash = create_file(small_size, temp_dir)
+                small_name, small_hash, file_info = create_file(small_size,
+                                                                temp_dir)
                 files[small_name] = small_hash
-                dprnt('adding small file {} of size {} with hash {}'
-                      .format(small_name, small_size, small_hash))
-                tarobj.add(small_name)
+                dprnt('adding small file {} of size {}, info {} with hash {}'
+                      .format(small_name, small_size, file_info, small_hash))
+                tarobj.add(small_name, arcname=basename(small_name))
                 dprnt('now offset={}, size_left={}'
                       .format(tarobj.offset, size_left_func()))
 
             # close tarobj -- happens in __exit__ of TarFile context
             dprnt('closing tar file')
         # now tarobj should be closed
+        if not temp_file.closed:
+            dprnt('closing temp file -- this should have happened at volume '
+                  'change!')
+            temp_file.close()
 
         # remember size of first volume (2nd should always be RECORDSIZE)
-        dprnt('size of first volume file: {}'
-              .format(os.stat(temp_file.name).st_size))
+        dprnt('size of first volume file: {}; offset at vol change: {}'
+              .format(os.stat(temp_file.name).st_size, offset_end_vol0))
         if os.stat(temp_file.name + ".1").st_size != RECORDSIZE:
             everything_ok = False
             dprnt('strange size of 2nd volume: {}'
@@ -207,15 +221,17 @@ def do_test(seed, tar_mode, temp_dir, print_everything=False):
         if len(os.listdir(temp_dir)) != len(files)+2 + n_files_at_start:
             everything_ok = False
             dprnt('wrong number of files: found {} but expect {}!'
-                  .format(len(os.listdir()), len(files)+2+n_files_at_start))
-            for file_name in os.listdir():
+                  .format(len(os.listdir(temp_dir)),
+                          len(files)+2+n_files_at_start))
+            for file_name in os.listdir(temp_dir):
                 dprnt('listdir: {}'.format(file_name))
 
-        for file_name, file_hash in files:
+        for file_name, file_hash in files.items():
             if not os.path.exists(file_name):
                 everything_ok = False
                 dprnt('failed to find file {} after extraction'
                       .format(file_name))
+                continue
             if hash_file(file_name) != file_hash:
                 everything_ok = False
                 dprnt('wrong hash for file {} after extraction: {} != {}'
@@ -261,9 +277,10 @@ def test_forever():
 
     # more params
     fast_fail = True
-    print_everything = False
-    modes = 'w:tar', 'w:gz', 'w:bz2', 'w|tar', 'w|gz', 'w|bz2', 'w#tar', \
+    print_everything = True
+    modes = 'w:tar', 'w|tar', 'w|gz', 'w|bz2', 'w#tar', \
             'w#gz', #'w#gz.aes128', 'w#gz.aes256', 'w#aes128', 'w#aes256'
+            # not currently working: 'w:gz', 'w:bz2',
 
     # seed properly
     random.seed()
@@ -293,6 +310,8 @@ def test_forever():
                               .format(n_runs, (time()-start_time)/n_runs))
         except KeyboardInterrupt:
             print('Stopped by user')
+            for line in format_exc().splitlines():
+                print(line)
 
     # summarize
     print('')