3 """ Test size of volumes when using multiple volumes and compression is on
5 Uses random files from disc as input. That is not very time-efficient but
6 provides a realistic setting for the nature of input data (file sizes,
7 randomness of data, ...)
9 Not a unittest, will probably take too long
12 from tempfile import mkstemp, mkdtemp
13 from shutil import rmtree
16 from os.path import isdir, dirname, abspath, join as pjoin
17 from glob import iglob
20 from traceback import print_exc
22 if __name__ == '__main__':
23 # ensure we are importing the "right" deltatar
24 parent_dir = dirname(dirname(abspath(__file__)))
25 sys.path.insert(0, parent_dir)
26 print('pre-prended {} to sys path'.format(parent_dir))
28 from deltatar.tarfile import TarFile, BLOCKSIZE
29 import deltatar.crypto as crypto
32 #: tolerances of volume sizes
35 SIZE_TOLERANCE_GZ = 32*KiB # Gzip compression object buffer
36 SIZE_TOLERANCE_BZ2 = MiB
37 SIZE_TOLERANCE_XZ = 72*KiB
38 SIZE_TOLERANCE_NONE = 3*BLOCKSIZE # should be little
40 #: variables for find_random_files
41 DIR_RETURN_MIN_REC = 5
42 DIR_RETURN_PROB = 0.0 # disabled
46 #: subdirs of START_DIR that might contain volatile or network-mounted data
47 EXCLUDE_DIRS = 'var', 'proc', 'dev', 'tmp', 'media', 'mnt', 'sys'
49 OK_MODES = stat.S_ISREG, stat.S_ISDIR, stat.S_ISFIFO, stat.S_ISLNK, \
50 stat.S_ISCHR, stat.S_ISBLK
53 def _get_random_file(dir_name, rec_level):
54 """ recursive helper for find_random_files """
56 if rec_level > DIR_MAX_REC:
59 #print('_get_random_file in {}, level {}'.format(dir_name, rec_level))
61 contents = os.listdir(dir_name)
62 except PermissionError:
67 entry = pjoin(dir_name, random.choice(contents))
70 if rec_level > DIR_RETURN_MIN_REC and \
71 random.random() < DIR_RETURN_PROB:
72 return entry # with a small probability return a dir
74 return _get_random_file(entry, rec_level + 1)
79 def find_random_files(min_file_size=100):
80 """ generator over random file names
82 Checks if files are readable by user (os.access) and excludes dirs with
83 most volatile files and mounts; will still yield links or -- with a small
84 probablility -- names of dirs with many parents (no '/usr' but maybe
85 /usr/local/lib/python/site-packages/deltatar)
87 :param int min_file_size: size (in bytes) that returned files have to have
91 # prepare list of dirs in START_DIR that are not EXCLUDE_DIRS
92 start_contents = [pjoin(START_DIR, dirn) for dirn in os.listdir(START_DIR)]
93 for excl in EXCLUDE_DIRS:
95 start_contents.remove(pjoin(START_DIR, excl))
101 #print('_get_random_file in {}, level {}'.format(START_DIR, 0))
102 entry = random.choice(start_contents)
104 next_result = _get_random_file(entry, 1)
107 #print('found non-dir in START_DIR: {}'.format(next_result))
108 if next_result is None:
109 #print('received None, try next')
111 if not os.access(next_result, os.R_OK):
112 #print('cannot access {}, try next'.format(next_result))
114 statres = os.lstat(next_result)
115 if statres.st_size < min_file_size:
116 #print('file {} too small'.format(next_result))
118 mode = statres.st_mode
119 if not any(mode_test(mode) for mode_test in OK_MODES):
120 #print('mode not accepted for {}, try next'.format(next_result))
125 def new_volume_handler(tarobj, base_name, volume_number,
126 prefix='', debug_level=0):
127 """ called when creating a new volume from TarFile.addfile """
130 print(prefix + 'new volume handler called with {} and new vol {}'
131 .format(base_name, volume_number))
133 # close current volume file object
134 tarobj.fileobj.close()
136 # create name for next volume file
137 idx = base_name.rindex('.0.')
138 new_vol_path = '{}.{}.{}'.format(base_name[:idx], volume_number,
141 tarobj.open_volume(new_vol_path)
144 def test(volume_size, input_size_factor, mode, password, temp_dir, prefix='',
145 clean_up_if_error=False, debug_level=0):
146 """ create TarFile with given vol_size, add vol_size*input_size
148 :param volume_size: in MiB
149 :param str prefix: optional output prefix
150 :param str mode: compression mode for TarFile's mode argument
151 :param bool clean_up_if_error: True will ensure there are no files left;
152 False (default): leave volumes if error
153 :param int debug_level: 0-3 where 0=no debug output, 3=lots of debug output
154 (forwarded to TarFile constructor)
155 :returns: True if test failed (some size wrong, file missing, ...)
158 input_size = volume_size * input_size_factor * MiB
159 something_strange = False
163 size_tolerance = SIZE_TOLERANCE_GZ
166 size_tolerance = SIZE_TOLERANCE_BZ2
169 size_tolerance = SIZE_TOLERANCE_XZ
172 size_tolerance = SIZE_TOLERANCE_NONE
180 file_handle, temp_name = mkstemp(dir=temp_dir, suffix='.0.' + suffix)
181 os.close(file_handle)
185 base_name = temp_name.replace('.0.' + suffix, '')
187 print(prefix + 'tarfile: ' + temp_name)
189 volume_prefix = prefix + 'vol={}MiB, in=*{}, mode={}: ' \
190 .format(volume_size, input_size_factor, mode)
191 def vol_handler(a,b,c):
192 return new_volume_handler(a,b,c, volume_prefix, debug_level)
197 if password is not None:
198 encryptor = crypto.Encrypt (1, 1, password=password)
200 tarobj = TarFile.open(temp_name, mode=mode,
201 max_volume_size=volume_size*MiB,
202 new_volume_handler=vol_handler,
203 encryption=encryptor, debug=debug_level)
209 for count, file_name in enumerate(find_random_files()):
210 if file_name.startswith(base_name):
211 continue # do not accidentally add self
212 new_size = os.lstat(file_name).st_size
213 if new_size > max(volume_size*MiB, input_size-added_size):
214 continue # add at most one volume_size too much
215 new_name = '{}_{:04d}_{}_{:09d}' \
216 .format(base_name, count,
217 file_name.replace('/','_')[:200],
219 tarobj.add(file_name, arcname=new_name)
220 files_added.append(new_name)
221 added_size += new_size
223 print('{}vol={}MiB, in=*{}, mode={}: added {:.1f}MiB/{:.1f}MiB'
224 .format(prefix, volume_size, input_size_factor, mode,
225 added_size/MiB, input_size/MiB))
226 if added_size > input_size:
234 for file_name in iglob(pjoin(temp_dir, base_name + '*')):
236 vol_size = os.lstat(file_name).st_size
237 volume_size_sum += vol_size
239 print('{} - {}: {:.3f}'.format(prefix, file_name,
241 if abs(vol_size - volume_size*MiB) > size_tolerance:
245 print(prefix + 'compression ratio (input/compressed size): {:.2f}'
246 .format(added_size/volume_size_sum))
249 print(prefix + 'wrong size!')
250 something_strange = True
252 print(prefix + 'no volumes!')
253 something_strange = True
257 print(prefix + 'extracting:')
259 if password is not None:
260 decryptor = crypto.Decrypt (password=password)
261 tarobj = TarFile.open(temp_name, mode=mode.replace('w', 'r'),
262 new_volume_handler=new_volume_handler,
263 encryption=decryptor, debug=debug_level)
264 tarobj.extractall(path='/')
267 # check whether all original files are accounted for
269 files_found = [False for _ in files_added]
271 for file_name in iglob(pjoin(temp_dir, base_name + '_*')):
273 orig_size = int(file_name[-9:])
274 if os.lstat(file_name).st_size != orig_size:
275 print(prefix + 'wrong size: {} instead of {} for {}!'
276 .format(os.lstat(file_name).st_size, orig_size,
278 something_strange = True
280 idx = files_added.index(file_name)
282 print(prefix + 'extracted file that was not added: '
284 something_strange = True
286 files_found[idx] = True
288 not_found = [file_name
289 for file_name, found in zip(files_added, files_found)
292 for file_name in not_found:
293 print(prefix + 'original file not found: ' + file_name)
294 something_strange = True
296 if n_files_found != len(files_added):
297 print(prefix + 'added {} files but extracted {}!'
298 .format(len(files_added), n_files_found))
299 something_strange = True
300 except Exception as exc:
301 print('caught exception {}'.format(exc))
303 something_strange = True
306 os.close(file_handle)
310 for file_name in iglob(base_name + '*'):
311 if clean_up_if_error:
313 elif something_strange and file_name.endswith('.' + suffix):
316 os.unlink(file_name) # remove
317 if debug_level and something_strange and not clean_up_if_error:
318 print(prefix + 'leaving volume files ' + base_name
322 if something_strange:
323 print('{}test with volume_size={}, input_factor={}, mode={} failed!'
324 .format(prefix, volume_size, input_size_factor, mode))
326 print(prefix + 'test succeeded')
328 return something_strange
331 def test_lots(fast_fail=False, debug_level=0, clean_up_if_error=False):
332 """ Tests a lot of combinations of volume_size, input_size and mode
334 :param bool fast_fail: set to True to stop after first error
335 :retuns: number of failed tests
338 # volume sizes in MiB
339 volume_sizes = 10, 100
341 # input size factor (multiplied with volume size)
342 input_size_factors = 3, 10, 30
344 # compression modes (including uncompressed as comparison)
345 modes = ('w|gz' , None) \
349 , ('w#gz' , "test1234") \
350 , ('w#' , "test1234")
352 # create a temp dir for all input and output data
353 temp_dir = mkdtemp(prefix='deltatar_cmprs_tst_')
355 n_tests = len(volume_sizes) * len(input_size_factors) * len(modes)
358 for volume_size in volume_sizes:
361 for input_size_factor in input_size_factors:
364 for mode, password in modes:
366 prefix = 'test{:d}: '.format(test_idx)
367 something_strange = test(volume_size, input_size_factor, mode,
370 clean_up_if_error=False,
371 debug_level=debug_level)
372 if something_strange:
377 print('after running test {:3d}/{} have {} errs'
378 .format(test_idx, n_tests, n_errs))
380 print('removing temp dir {}'.format(temp_dir))
383 print('leaving temp dir {}'.format(temp_dir))
388 if __name__ == '__main__':
392 # forward number of errors to shell