moved specification of expectations out of test() function; deal with RECORDSIZE...
authorChristian Herdtweck <christian.herdtweck@intra2net.com>
Mon, 18 Jul 2016 11:06:10 +0000 (13:06 +0200)
committerChristian Herdtweck <christian.herdtweck@intra2net.com>
Thu, 12 Nov 2020 14:04:34 +0000 (15:04 +0100)
testing/volume_size_accuracy.py

index 2c956f9..da5e73f 100755 (executable)
@@ -3,11 +3,11 @@
 """ Check very accurately the splitting of files into volumes; not a unit test
 
 Check:
-- behaviour for max_volume_sizes % BLOCK_SIZE != 0
+- behaviour for max_volume_sizes % BLOCKSIZE != 0
 - file sizes very close to size remaining in volume
 
 By doing the following:
-- create a multi-volume archive with max_volume_size % BLOCK_SIZE == 0
+- create a multi-volume archive with max_volume_size % BLOCKSIZE == 0
 - add a file that nearly fills the volume
 - add a small file that should just fit in or not
 - check expected number and size of volumes
@@ -17,58 +17,77 @@ Repeat with compressed/encrypted data; for this have to find some random data
 that is repeatable (--> :py:mod:`create_pseudo_random_files`) and experiment a
 bit for suitable seeds and sizes
 
-e.g. for max_volume_size = 8 * BLOCK_SIZE:
-
-block | 0     | 1     | 2     | 3     | 4     | 5     | 6     | 7     |
-      +-------+-------+-------+-------+-------+-------+-------+-------+
-all fit into first volume
-vol0: | Info0 | Dat00 | Dat01 | Dat02 | Info1 | Dat10 |   0   |   0   |
-      +-------+-------+-------+-------+-------+-------+-------+-------+
-
-file0 needs next block:
-vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Info1 | Dat10 | (ends early)
-vol1: |   0   |   0   |       |       |       |       |       |       |
-      +-------+-------+-------+-------+-------+-------+-------+-------+
-
-file0 needs 2 more blocks
-vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | Info1 | Dat10 |
-vol1: |   0   |   0   |       |       |       |       |       |       |
-      +-------+-------+-------+-------+-------+-------+-------+-------+
-
-file0 needs 3 more blocks
-vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | (ends early)
-vol1: | Dat05 | Info1 | Dat10 |   0   |   0   |       |       |       |
-      +-------+-------+-------+-------+-------+-------+-------+-------+
-
-file0 regular, file1 needs next block:
-vol0: | Info0 | Dat00 | Dat01 | Dat02 | Info1 | Dat10 | (ends early)
-vol1: | Dat11 |   0   |   0   |       |       |       |       |       |
-      +-------+-------+-------+-------+-------+-------+-------+-------+
-
-both need next block:
-vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Info1 | Dat10 | (ends early)
-vol1: | Dat11 |   0   |   0   |       |       |       |       |       |
-      +-------+-------+-------+-------+-------+-------+-------+-------+
-
-single huge file
-vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | (ends early)
-vol1: | Dat05 | Dat06 | Dat07 | Dat08 | Dat09 | Dat0A | (ends early)
-vol2: |  ...  |       |       |       |       |       |       |       |
-      +-------+-------+-------+-------+-------+-------+-------+-------+
+e.g. for max_volume_size = RECORDSIZE + 1 * BLOCKSIZE
+
+File 0 has Info0 and blocks Dat00, Dat01, ... Dat0K, (Dat0L, Dat0M, Dat0N)
+File 1 has Info1 and blocks Dat10, (Dat11)
+                                                                end of   end of
+                                                                RECORD   volume
+block | 0     | 1     | 2     | ... | -5    | -4    | -3    | -2    | -1    |
+      +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+0: all fit into first record: blocks = [N_BLOCKS_PER_RECORD-5, 1]
+vol0: | Info0 | Dat00 | Dat01 | ... | Info1 | Dat10 |   0   |   0   |       |
+      +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+
+1: all fit into first volume: blocks = [MAX_VOLUME_BLOCKS-5, 1]
+vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Info1 | Dat10 |   0   |   0   |
+      +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+
+2: file0 needs next block: blocks = [MAX_VOLUME_BLOCKS-4, 1]
+vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Dat0L | Info1 | Dat10 |   0   | 0
+      +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+
+3: file0 needs 2 more blocks: blocks = [MAX_VOLUME_BLOCKS-3, 1]
+vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Dat0L | Dat0M | Info1 | Dat10 |00
+      +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+
+4: file0 needs 3 more blocks: blocks = [MAX_VOLUME_BLOCKS-2, 1]
+vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Dat0L | Dat0M |  wasted space |
+vol1: |  VOL  | Dat0N | Info1 | Dat10 | 00
+      +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+
+5: file0 regular, file1 needs next block: blocks = [MAX_VOLUME_BLOCKS-5, 2]
+vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Info1 | Dat10 |       |       |
+vol1: |  VOL  | Dat11 |   0   |   0   |     |       |       |       |       |
+      +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+
+6: both need next block: blocks = [MAX_VOLUME_BLOCKS-4, 2]
+vol0: | Info0 | Dat00 | Dat01 | ... | Dat0K | Dat0L | Info1 | Dat10 |       |
+vol1: |  VOL  | Dat11 |   0   |   0   |     |       |       |       |       |
+      +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
+
+single huge file: blocks = [MAX_VOLUME_BLOCKS * 3,]
+vol0: | Info0 | Dat00 | Dat01 | ... | Dat02 | Dat03 | Dat04 |  wasted space |
+vol1: |  VOL  | Dat05 | Dat06 | Dat07 | ... | Dat08 | Dat09 |  wasted space |
+vol2: |  ...  |       |       |     |       |       |       |       |       |
+      +-------+-------+-------+ ... +-------+-------+-------+-------+-------+
 
 .. codeauthor:: Intra2net AG <info@intra2net>
 """
 
 
 import os
+from os.path import dirname, abspath
 import sys
 from math import ceil
 from glob import glob
 from tempfile import NamedTemporaryFile, TemporaryDirectory
-from deltatar.tarfile import TarFile, BLOCKSIZE
+
+# try to import the tarfile from source, not the globally installed one
+source_base = dirname(dirname(abspath(__file__)))
+print('adding {} to python path'.format(source_base))
+if os.path.isdir(source_base):
+    sys.path.insert(0, source_base)
+import inspect
+from deltatar.tarfile import TarFile, BLOCKSIZE, RECORDSIZE
+print('using TarFile from ' + dirname(inspect.getsourcefile(TarFile)))
+
+
+#: number of blocks in a record
+N_BLOCKS_PER_RECORD = RECORDSIZE // BLOCKSIZE
 
 #: number of blocks per tar volume file
-MAX_VOLUME_BLOCKS = 8
+MAX_VOLUME_BLOCKS = N_BLOCKS_PER_RECORD + 1
 
 
 def fill_file(file_handle, data_size):
@@ -84,136 +103,131 @@ def fill_file(file_handle, data_size):
 
 def new_volume_handler(tarobj, base_name, volume_number):
     """ called from tarobj when creating a new volume """
-    volume_path = "%s.%d" % (base_name, volume_number)
     tarobj.fileobj.close()
+    volume_path = "%s.%d" % (base_name, volume_number)
+    print('new-volume handler: creating volume {}'.format(volume_path))
     tarobj.open_volume(volume_path)
 
 
-def test(temp_dir, size0, size1, volume_size_offset=0):
+def size_str(size):
+    """ return string 'N (= b BLKS + m)' """
+    return '{} (= {} BLKs + {})'.format(size, *divmod(size, BLOCKSIZE))
+
+
+def test(temp_dir, file_blocks, volume_blocks_arg, offset_blocks,
+         file_size_offsets=(0, 0), volume_size_offset=0):
     """ create TarFile with given configuration """
 
-    if volume_size_offset < 0 or volume_size_offset > BLOCKSIZE-1:
+    if not (0 <= volume_size_offset < BLOCKSIZE):
         raise ValueError('volume size offset outside allowed interval '
                          '[0, BLOCKSIZE-1]: {}'.format(volume_size_offset))
-        # in this range, volume_size offset should not make a difference in
-        # expectations below
-    if size0 < size1:
-        raise ValueError('file0 must be larger than file1!')
-
-    # think about what we expect
-    n_blocks0 = ceil(size0 / BLOCKSIZE)   # number of blocks that file0 ...
-    n_blocks1 = ceil(size1 / BLOCKSIZE)   # ... and file1 will require in tar
-    if n_blocks0 + n_blocks1 < MAX_VOLUME_BLOCKS - 4:  # do not want to test...
-        raise ValueError('overall file sizes too small!')
-    elif n_blocks0 + n_blocks1 > MAX_VOLUME_BLOCKS + 1:    # ...these cases
-        raise ValueError('overall file sizes too big!')
-    elif n_blocks0 + n_blocks1 < MAX_VOLUME_BLOCKS - 3:
-        n_volumes = 1
-    else:
-        n_volumes = 2
-
-    expected_offset0 = 0
-    file0_complete_in_vol0 = n_blocks0 < MAX_VOLUME_BLOCKS - 2
-       # file 0 is completed in volume 0 --> file 1 starts in volume 0
-
-    if file0_complete_in_vol0:
-        expected_offset1 = (n_blocks0 + 1) * BLOCKSIZE
-    else:
-        # offset1 in volume1; 3 blocks in volume0 cannot be used by file0
-        expected_offset1 = ( n_blocks0 - (MAX_VOLUME_BLOCKS - 3) ) * BLOCKSIZE
-
-    # nubmer of blocks written overall: data blocks + 2*info + 2*0-blocks
-    n_blocks_overall = n_blocks0 + n_blocks1 + 2 + 2
+    for idx, size_offset in enumerate(file_size_offsets):
+        if not (0 <= size_offset < BLOCKSIZE):
+            raise ValueError('size offset for file {} outside allowed interval '
+                             '[0, BLOCKSIZE-1]: {}'
+                             .format(idx, size_offset))
+    if len(file_blocks) != len(file_size_offsets):
+        raise ValueError('need same number of file block sizes and offsets!')
 
-    if n_volumes == 0:
-        expected_sizes = [n_blocks_overall * BLOCKSIZE, ]
-    elif file0_complete_in_vol0:
-        expected_sizes = [(n_blocks0 + 2) * BLOCKSIZE,
-                          (n_blocks_overall-n_blocks0-2)*BLOCKSIZE]
-    else:
-        n_blocks_in_vol0 = MAX_VOLUME_BLOCKS-2
-        expected_sizes = [n_blocks_in_vol0 * BLOCKSIZE,
-                          (n_blocks_overall-n_blocks_in_vol0) * BLOCKSIZE]
-
-    # create TarFile with max volume size approx 8 blocks
     max_volume_size = MAX_VOLUME_BLOCKS * BLOCKSIZE  + volume_size_offset
     actual_sizes = []
+    actual_offsets = []
     volume_files = []
     tar_handle = None
     with NamedTemporaryFile(dir=temp_dir,
                             suffix='.tar',
+                            mode='wb',
                             delete=False) as tar_handle:
+        # create TarFile
         print('creating tar {} with max volume size {}'
-              .format(tar_handle.name, max_volume_size))
+              .format(tar_handle.name, size_str(max_volume_size)))
         tarobj = TarFile.open(mode='w:',
                               fileobj=tar_handle,
                               max_volume_size=max_volume_size,
                               new_volume_handler=new_volume_handler)
 
         # add files, remember offsets
-        real_offset0 = tarobj.offset
-
-        # create and add file0
-        add_handle = None
-        with NamedTemporaryFile(dir=temp_dir, delete=False) as add_handle:
-            fill_file(add_handle, size0)
-            add_handle.close()
-        print('adding file of size {0} ({1} = {2[0]} BLOCKS + {2[1]})'
-              .format(os.stat(add_handle.name).st_size, size0,
-                      divmod(size0, BLOCKSIZE)))
-        tarobj.add(add_handle.name, arcname='file0')
-        if add_handle:
-            os.unlink(add_handle.name)
-        real_offset1 = tarobj.offset
-
-        # create and add file1
-        add_handle = None
-        with NamedTemporaryFile(dir=temp_dir, delete=False) as add_handle:
-            fill_file(add_handle, size1)
-            add_handle.close()
-        print('adding file of size {0} ({1} = {2[0]} BLOCKS + {2[1]})'
-              .format(os.stat(add_handle.name).st_size, size1,
-                      divmod(size1, BLOCKSIZE)))
-        tarobj.add(add_handle.name, arcname='file1')
-        if add_handle:
-            os.unlink(add_handle.name)
+        for idx, (size_blocks, size_offset) \
+                in enumerate(zip(file_blocks, file_size_offsets)):
+
+            # remember offset
+            actual_offsets.append(tarobj.offset)
+
+            # create file
+            add_handle = None
+            file_size = size_blocks * BLOCKSIZE - size_offset
+            with NamedTemporaryFile(dir=temp_dir, delete=False) as add_handle:
+                fill_file(add_handle, file_size)
+                add_handle.close()
+            print('adding file of size {} at offset {}'
+                  .format(size_str(file_size), size_str(tarobj.offset)))
+
+            # add file
+            tarobj.add(add_handle.name, arcname='file{}'.format(idx))
+
+            # remove file
+            if add_handle:
+                os.unlink(add_handle.name)
+
+        # remember offset where 0s should be added
+        actual_offsets.append(tarobj.offset)
 
         # close tar file
+        print('before close: offset is ' + size_str(actual_offsets[-1]))
         tarobj.close()
+        tar_handle.close()
+        print('after close: offset is {}' + size_str(tarobj.offset))
 
         # get volume file sizes
         volume_files = sorted(glob(tar_handle.name + "*"))
         for volume_file in volume_files:
-            actual_sizes.append(os.stat(volume_file).st_size)
+            actual_size = os.stat(volume_file).st_size
             print('found volume {} of size {}'
-                  .format(volume_file, actual_sizes[-1]))
+                  .format(volume_file, size_str(actual_size)))
+            actual_sizes.append(actual_size)
 
     for volume_file in volume_files:
         os.unlink(volume_file)
     # now all temp files should be deleted again
 
     # check expectation
-    found_err = False
-    if len(actual_sizes) != n_volumes:
-        found_err = True
+    everything_ok = True
+    if len(actual_offsets) != len(offset_blocks):
+        everything_ok = False
+        print('have {} actual offsets but expected {}!'
+              .format(len(actual_offsets), len(offset_blocks)))
+    for idx, (actual_offset, expected_block) \
+            in enumerate(zip(actual_offsets, offset_blocks)):
+        if actual_offset != expected_block * BLOCKSIZE:
+            everything_ok = False
+            print('wrong offset for file {}: {} != {}'
+                  .format(idx, size_str(actual_offset),
+                          size_str(expected_block*BLOCKSIZE)))
+
+    # last volume is filled up to RECORDSIZE
+    volume_blocks = list(volume_blocks_arg)
+    if (len(actual_sizes) == len(volume_blocks)-1) \
+            and (volume_blocks[-1] == 0):
+        actual_sizes.append(0)
+        volume_blocks[-2] = ceil(volume_blocks[-2] / N_BLOCKS_PER_RECORD) \
+                                                   * N_BLOCKS_PER_RECORD
+    elif len(actual_sizes) == len(volume_blocks) + 1:
+        volume_blocks[-1] = ceil(volume_blocks[-1] / N_BLOCKS_PER_RECORD) \
+                                                   * N_BLOCKS_PER_RECORD
+    else:
+        everything_ok = False
         print('wrong number of volumes: {} != {}'
-              .format(len(actual_sizes), n_volumes))
-    for vol_idx, (actual_size, expected_size) in \
-            enumerate(zip(actual_sizes, expected_sizes)):
-        if actual_size != expected_size:
-            found_err = True
+              .format(len(actual_sizes)-1, len(volume_blocks)))
+
+    for idx, (actual_size, expected_blocks) in \
+            enumerate(zip(actual_sizes, volume_blocks)):
+        if actual_size != expected_blocks * BLOCKSIZE:
+            everything_ok = False
             print('wrong size for volume {}: {} != {}'
-                  .format(vol_idx, actual_size, expected_size))
-    if real_offset0 != expected_offset0:
-        found_err = True
-        print('wrong offset for file0: {} != {}'
-              .format(real_offset0, expected_offset0))
-    if real_offset1 != expected_offset1:
-        found_err = True
-        print('wrong offset for file1: {} != {}'
-              .format(real_offset1, expected_offset1))
+                  .format(idx, size_str(actual_size),
+                          size_str(expected_blocks * BLOCKSIZE)))
 
-    return ~found_err
+    return everything_ok
 
 
 def main():
@@ -222,11 +236,29 @@ def main():
     see module doc for more info
     """
 
+    N = N_BLOCKS_PER_RECORD
+    M = MAX_VOLUME_BLOCKS
+
+    # define tests by numbers of blocks:
+    # n_blocks file 0, 1; n_blocks vol0, 1, offset Info0, Info1, 0-blocks
+    tests = (((N-5, 1), (N,   0), (0, N-4, N-2)),
+             ((M-5, 1), (M,   0), (0, M-4, M-2)),
+             ((M-4, 1), (M+1, 0), (0, M-3, M-1)),
+             ((M-3, 1), (M+2, 0), (0, M-2, M)),
+             ((M-2, 1), (M-2, 6), (0, 2,   4)),
+             ((M-5, 2), (M-2, 4), (0, M-4, 2)),
+             ((M-4, 2), (M-1, 4), (0, M-3, 2)))
+
     n_errs = 0
     with TemporaryDirectory() as temp_dir:
-        test_succeeded = test(temp_dir, 3*BLOCKSIZE, 1*BLOCKSIZE)
-        if not test_succeeded:
-            n_errs += 1
+        for size_comb_idx, (file_sizes, vol_sizes, offsets) \
+                in enumerate(tests):
+            print('-' * 72)
+            print('size combination {}: ({}, {})'
+                  .format(size_comb_idx, *file_sizes))
+            test_succeeded = test(temp_dir, file_sizes, vol_sizes, offsets)
+            if not test_succeeded:
+                n_errs += 1
 
     return n_errs