implemented volume size accuracy test before realizing RECORDSIZE
authorChristian Herdtweck <christian.herdtweck@intra2net.com>
Fri, 15 Jul 2016 10:43:16 +0000 (12:43 +0200)
committerChristian Herdtweck <christian.herdtweck@intra2net.com>
Thu, 12 Nov 2020 14:04:34 +0000 (15:04 +0100)
testing/volume_size_accuracy.py [changed mode: 0644->0755]

old mode 100644 (file)
new mode 100755 (executable)
index 2e8e5a4..2c956f9
@@ -12,35 +12,208 @@ By doing the following:
 - add a small file that should just fit in or not
 - check expected number and size of volumes
 - repeat with max_volume_size +1, -1, +2, -2, +10, -10
+
 Repeat with compressed/encrypted data; for this have to find some random data
 that is repeatable (--> :py:mod:`create_pseudo_random_files`) and experiment a
 bit for suitable seeds and sizes
 
 e.g. for max_volume_size = 8 * BLOCK_SIZE:
 
-block | 0      | 1     | 2     | 3     | 4     | 5     | 6     | 7     | (8!)
-------+--------+-------+-------+-------+-------+-------+-------+-------+------
-file0 fits into 5 blocks:
-vol0: | Info0  | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | Info1 | Dat10 |
-vol1: | Dat11  |       |       |       |       |       |       |       |
-
-OR: file0 needs 6th block --> force volume0 too big:
-vol0: | Info0  | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | Dat05 | Info1 | Dat10
-vol1: | Dat11  |       |       |       |       |       |       |       |
-
-OR: all fit into first volume
-vol0: | Info0  | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | Info1 | Dat10 |
-
-NOT: Info block in the end
-vol0: | Info0  | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | Dat05 | Info1 |
-vol1: | Dat10  | Dat11 |       |       |       |       |       |       |
+block | 0     | 1     | 2     | 3     | 4     | 5     | 6     | 7     |
+      +-------+-------+-------+-------+-------+-------+-------+-------+
+all fit into first volume
+vol0: | Info0 | Dat00 | Dat01 | Dat02 | Info1 | Dat10 |   0   |   0   |
+      +-------+-------+-------+-------+-------+-------+-------+-------+
+
+file0 needs next block:
+vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Info1 | Dat10 | (ends early)
+vol1: |   0   |   0   |       |       |       |       |       |       |
+      +-------+-------+-------+-------+-------+-------+-------+-------+
+
+file0 needs 2 more blocks
+vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | Info1 | Dat10 |
+vol1: |   0   |   0   |       |       |       |       |       |       |
+      +-------+-------+-------+-------+-------+-------+-------+-------+
+
+file0 needs 3 more blocks
+vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | (ends early)
+vol1: | Dat05 | Info1 | Dat10 |   0   |   0   |       |       |       |
+      +-------+-------+-------+-------+-------+-------+-------+-------+
+
+file0 regular, file1 needs next block:
+vol0: | Info0 | Dat00 | Dat01 | Dat02 | Info1 | Dat10 | (ends early)
+vol1: | Dat11 |   0   |   0   |       |       |       |       |       |
+      +-------+-------+-------+-------+-------+-------+-------+-------+
+
+both need next block:
+vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Info1 | Dat10 | (ends early)
+vol1: | Dat11 |   0   |   0   |       |       |       |       |       |
+      +-------+-------+-------+-------+-------+-------+-------+-------+
+
+single huge file
+vol0: | Info0 | Dat00 | Dat01 | Dat02 | Dat03 | Dat04 | (ends early)
+vol1: | Dat05 | Dat06 | Dat07 | Dat08 | Dat09 | Dat0A | (ends early)
+vol2: |  ...  |       |       |       |       |       |       |       |
+      +-------+-------+-------+-------+-------+-------+-------+-------+
 
 .. codeauthor:: Intra2net AG <info@intra2net>
 """
 
 
-from deltatar.tarfile import TarFile
+import os
+import sys
+from math import ceil
+from glob import glob
+from tempfile import NamedTemporaryFile, TemporaryDirectory
+from deltatar.tarfile import TarFile, BLOCKSIZE
+
+#: number of blocks per tar volume file
+MAX_VOLUME_BLOCKS = 8
+
+
+def fill_file(file_handle, data_size):
+    """ fill given file handle with nonsense data of given size """
+    temp_data = bytes(range(2**8))
+    temp_size = len(temp_data)
+    n_written = 0
+    while n_written + temp_size <= data_size:
+        file_handle.write(temp_data)
+        n_written += temp_size
+    file_handle.write(temp_data[:data_size-n_written])
+
+
+def new_volume_handler(tarobj, base_name, volume_number):
+    """ called from tarobj when creating a new volume """
+    volume_path = "%s.%d" % (base_name, volume_number)
+    tarobj.fileobj.close()
+    tarobj.open_volume(volume_path)
+
+
+def test(temp_dir, size0, size1, volume_size_offset=0):
+    """ create TarFile with given configuration """
+
+    if volume_size_offset < 0 or volume_size_offset > BLOCKSIZE-1:
+        raise ValueError('volume size offset outside allowed interval '
+                         '[0, BLOCKSIZE-1]: {}'.format(volume_size_offset))
+        # in this range, volume_size offset should not make a difference in
+        # expectations below
+    if size0 < size1:
+        raise ValueError('file0 must be larger than file1!')
+
+    # think about what we expect
+    n_blocks0 = ceil(size0 / BLOCKSIZE)   # number of blocks that file0 ...
+    n_blocks1 = ceil(size1 / BLOCKSIZE)   # ... and file1 will require in tar
+    if n_blocks0 + n_blocks1 < MAX_VOLUME_BLOCKS - 4:  # do not want to test...
+        raise ValueError('overall file sizes too small!')
+    elif n_blocks0 + n_blocks1 > MAX_VOLUME_BLOCKS + 1:    # ...these cases
+        raise ValueError('overall file sizes too big!')
+    elif n_blocks0 + n_blocks1 < MAX_VOLUME_BLOCKS - 3:
+        n_volumes = 1
+    else:
+        n_volumes = 2
+
+    expected_offset0 = 0
+    file0_complete_in_vol0 = n_blocks0 < MAX_VOLUME_BLOCKS - 2
+       # file 0 is completed in volume 0 --> file 1 starts in volume 0
+
+    if file0_complete_in_vol0:
+        expected_offset1 = (n_blocks0 + 1) * BLOCKSIZE
+    else:
+        # offset1 in volume1; 3 blocks in volume0 cannot be used by file0
+        expected_offset1 = ( n_blocks0 - (MAX_VOLUME_BLOCKS - 3) ) * BLOCKSIZE
+
+    # nubmer of blocks written overall: data blocks + 2*info + 2*0-blocks
+    n_blocks_overall = n_blocks0 + n_blocks1 + 2 + 2
+
+    if n_volumes == 0:
+        expected_sizes = [n_blocks_overall * BLOCKSIZE, ]
+    elif file0_complete_in_vol0:
+        expected_sizes = [(n_blocks0 + 2) * BLOCKSIZE,
+                          (n_blocks_overall-n_blocks0-2)*BLOCKSIZE]
+    else:
+        n_blocks_in_vol0 = MAX_VOLUME_BLOCKS-2
+        expected_sizes = [n_blocks_in_vol0 * BLOCKSIZE,
+                          (n_blocks_overall-n_blocks_in_vol0) * BLOCKSIZE]
+
+    # create TarFile with max volume size approx 8 blocks
+    max_volume_size = MAX_VOLUME_BLOCKS * BLOCKSIZE  + volume_size_offset
+    actual_sizes = []
+    volume_files = []
+    tar_handle = None
+    with NamedTemporaryFile(dir=temp_dir,
+                            suffix='.tar',
+                            delete=False) as tar_handle:
+        print('creating tar {} with max volume size {}'
+              .format(tar_handle.name, max_volume_size))
+        tarobj = TarFile.open(mode='w:',
+                              fileobj=tar_handle,
+                              max_volume_size=max_volume_size,
+                              new_volume_handler=new_volume_handler)
+
+        # add files, remember offsets
+        real_offset0 = tarobj.offset
+
+        # create and add file0
+        add_handle = None
+        with NamedTemporaryFile(dir=temp_dir, delete=False) as add_handle:
+            fill_file(add_handle, size0)
+            add_handle.close()
+        print('adding file of size {0} ({1} = {2[0]} BLOCKS + {2[1]})'
+              .format(os.stat(add_handle.name).st_size, size0,
+                      divmod(size0, BLOCKSIZE)))
+        tarobj.add(add_handle.name, arcname='file0')
+        if add_handle:
+            os.unlink(add_handle.name)
+        real_offset1 = tarobj.offset
+
+        # create and add file1
+        add_handle = None
+        with NamedTemporaryFile(dir=temp_dir, delete=False) as add_handle:
+            fill_file(add_handle, size1)
+            add_handle.close()
+        print('adding file of size {0} ({1} = {2[0]} BLOCKS + {2[1]})'
+              .format(os.stat(add_handle.name).st_size, size1,
+                      divmod(size1, BLOCKSIZE)))
+        tarobj.add(add_handle.name, arcname='file1')
+        if add_handle:
+            os.unlink(add_handle.name)
+
+        # close tar file
+        tarobj.close()
+
+        # get volume file sizes
+        volume_files = sorted(glob(tar_handle.name + "*"))
+        for volume_file in volume_files:
+            actual_sizes.append(os.stat(volume_file).st_size)
+            print('found volume {} of size {}'
+                  .format(volume_file, actual_sizes[-1]))
+
+    for volume_file in volume_files:
+        os.unlink(volume_file)
+    # now all temp files should be deleted again
+
+    # check expectation
+    found_err = False
+    if len(actual_sizes) != n_volumes:
+        found_err = True
+        print('wrong number of volumes: {} != {}'
+              .format(len(actual_sizes), n_volumes))
+    for vol_idx, (actual_size, expected_size) in \
+            enumerate(zip(actual_sizes, expected_sizes)):
+        if actual_size != expected_size:
+            found_err = True
+            print('wrong size for volume {}: {} != {}'
+                  .format(vol_idx, actual_size, expected_size))
+    if real_offset0 != expected_offset0:
+        found_err = True
+        print('wrong offset for file0: {} != {}'
+              .format(real_offset0, expected_offset0))
+    if real_offset1 != expected_offset1:
+        found_err = True
+        print('wrong offset for file1: {} != {}'
+              .format(real_offset1, expected_offset1))
+
+    return ~found_err
 
 
 def main():
@@ -48,8 +221,15 @@ def main():
 
     see module doc for more info
     """
-    raise NotImplementedError()
+
+    n_errs = 0
+    with TemporaryDirectory() as temp_dir:
+        test_succeeded = test(temp_dir, 3*BLOCKSIZE, 1*BLOCKSIZE)
+        if not test_succeeded:
+            n_errs += 1
+
+    return n_errs
 
 
 if __name__ == '__main__':
-    main()
+    sys.exit(main())