1 # Copyright (C) 2013 Intra2net AG
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU Lesser General Public License as published
5 # by the Free Software Foundation; either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Lesser General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program. If not, see
15 # <http://www.gnu.org/licenses/lgpl-3.0.html>
19 from tempfile import TemporaryDirectory
21 import deltatar.crypto as crypto
22 from deltatar.tarfile import TarFile, PAX_FORMAT, GNU_FORMAT, BLOCKSIZE
23 from . import BaseTest, new_volume_handler, closing_new_volume_handler
24 from .create_pseudo_random_files import create_file as create_random_file
25 from .test_multivol_compression_sizes import test as multivol_compr_test_func
27 class MultivolGnuFormatTest(BaseTest):
29 Test multivolume support in tarfile. Tar Format is specified at class level.
32 # used as the --format argument to tar command on tar file creation
33 tar_command_format = "gnu"
35 # used as Tarfile.open format option argument for tar file creation
36 tarfile_format = GNU_FORMAT
38 # overhead size used to calculate the exact maximum size of a tar file with
39 # no extra volume that stores only one file. In case of GNU format this is
40 # the size of three blocks:
41 # * 1 block used to store the header information of the stored file
42 # * 2 blocks used to mark the end of the tar file
43 tarfile_overhead = 3*BLOCKSIZE
44 file_overhead = 1*BLOCKSIZE
46 # overhead size used to calculate the exact maximum size of a tar volume,
47 # corresponding with a multivolume tar file storing a single file. In the
48 # case of GNU format this is the same as tarfile_overhead.
49 tarvol_overhead = 3*BLOCKSIZE
51 def test_no_volume(self):
53 Create a tar file with only one file inside and no extra volumes
56 # create the content of the file to compress and hash it
57 hash = self.create_file("big", 50000)
59 # create the tar file with volumes
60 tarobj = TarFile.open("sample.tar", mode="w", format=self.tarfile_format)
64 # check that the tar volumes were correctly created
65 assert os.path.exists("sample.tar")
66 assert not os.path.exists("sample.tar.1")
69 assert not os.path.exists("big")
72 os.system("tar xfM sample.tar </dev/null")
73 assert os.path.exists("big")
74 assert hash == self.md5sum("big")
76 def test_volume_creation1(self):
78 Create a tar file with two volumes, only one file inside
81 # create the content of the file to compress and hash it
82 hash = self.create_file("big", 50000)
84 # create the tar file with volumes
85 tarobj = TarFile.open("sample.tar",
87 format=self.tarfile_format,
88 max_volume_size=30000,
89 new_volume_handler=new_volume_handler)
93 # check that the tar volumes were correctly created
94 assert os.path.exists("sample.tar")
95 assert os.path.exists("sample.tar.1")
96 assert not os.path.exists("sample.tar.2")
99 assert not os.path.exists("big")
101 # extract with normal tar and check output
102 os.system("tar xfM sample.tar --file=sample.tar.1 </dev/null")
103 assert os.path.exists("big")
104 assert hash == self.md5sum("big")
106 def test_volume_creation2(self):
108 Create a tar file with 2 extra volumes, only one file inside
111 # create the content of the file to compress and hash it
112 hash = self.create_file("big", 50000)
114 # create the tar file with volumes
115 tarobj = TarFile.open("sample.tar",
117 format=self.tarfile_format,
118 max_volume_size=20000,
119 new_volume_handler=new_volume_handler)
123 # check that the tar volumes were correctly created
124 assert os.path.exists("sample.tar")
125 assert os.path.exists("sample.tar.1")
126 assert os.path.exists("sample.tar.2")
127 assert not os.path.exists("sample.tar.3")
130 assert not os.path.exists("big")
132 # extract with normal tar and check output
133 os.system("tar xfM sample.tar --file=sample.tar.1 --file=sample.tar.2 </dev/null")
134 assert os.path.exists("big")
135 assert hash == self.md5sum("big")
137 def test_multivol_multifiles(self):
139 Create a tar file with two volumes and three files inside
144 hash["big"] = self.create_file("big", 50000)
145 hash["small"] = self.create_file("small", 100)
146 hash["small2"] = self.create_file("small2", 354)
148 # create the tar file with volumes
149 tarobj = TarFile.open("sample.tar",
151 format=self.tarfile_format,
152 max_volume_size=20000,
153 new_volume_handler=new_volume_handler)
159 # check that the tar volumes were correctly created
160 assert os.path.exists("sample.tar")
161 assert os.path.exists("sample.tar.1")
162 assert os.path.exists("sample.tar.2")
163 assert not os.path.exists("sample.tar.3")
169 # extract with normal tar and check output
170 os.system("tar xfM sample.tar --file=sample.tar.1 --file=sample.tar.2 </dev/null")
171 for key, value in hash.items():
172 assert os.path.exists(key)
173 assert value == self.md5sum(key)
175 def test_get_file_size(self):
177 Test _Stream.get_file_size which is the basis for multivol with
181 # create test files of different sizes
183 n_sizes = 13 # 1,4,16,64,256,1KiB=4**5 ... 1MiB=4**10 ... 1GiB=4**15
187 for exponents in range(n_sizes):
188 sizes.append(next_size)
189 new_name = 'size_test_{:08d}'.format(next_size)
190 file_names.append(new_name)
191 self.create_file(new_name, next_size)
192 next_size *= size_factor
197 for mode, password in [ ('w|gz', None) , ('w|bz2', None)
198 , ('w|xz', None) , ('w#gz' , None)
199 , ('w#gz', "test"), ('w#tar', "test")
201 tar_file_name = "size_test.tar." + mode[2:]
202 for size_number in range(4,n_sizes):
203 for order in 1,-1: # small files first or big files first
206 encryptor = None # could leak due to scoping
208 encryptor = crypto.Encrypt (password=password, version=1,
210 tarobj = TarFile.open(tar_file_name,
212 format=self.tarfile_format,
213 encryption=encryptor)
214 for file_name in file_names[:size_number][::order]:
215 tarobj.add(file_name)
216 estimate = tarobj.fileobj.estim_file_size()
218 estimate_post = tarobj.fileobj.estim_file_size()
219 actual_size = os.stat(tar_file_name).st_size
220 err = abs(actual_size - estimate)
221 #print('mode {:>11s}, {:2} files (up to size {:9}): '
222 # 'estim={:9}, true={:9}, post={:9}, err={:5}'
223 # .format(mode, size_number, sizes[size_number],
224 # estimate, actual_size, estimate_post, err))
225 os.unlink(tar_file_name)
228 err = abs(actual_size - estimate_post)
229 if err > max_err_post:
232 #print('max err is {}, post={}'.format(max_err, max_err_post))
233 assert max_err < 13*1024
234 assert max_err_post == 0
237 for file_name in file_names:
240 def test_multivol_compress_warning(self):
241 """ check warning being issued if compressing multivolume with w: """
242 with self.assertWarns(UserWarning):
243 tarobj = TarFile.open("sample.tar.gz",
245 format=self.tarfile_format,
246 max_volume_size=30000,
247 new_volume_handler=new_volume_handler)
250 def test_compress_single(self):
251 ''' check creation of single volume when compression is on '''
253 # create the content of the file to compress and hash it
254 hash = self.create_file("big", 50000)
256 # create the tar file with volumes and compression
257 tarobj = TarFile.open("sample.tar.gz",
259 format=self.tarfile_format,
260 max_volume_size=30000,
261 new_volume_handler=new_volume_handler,
267 # data fits into a single volume -- check that no second is created
268 assert os.path.exists("sample.tar.gz")
269 assert not os.path.exists("sample.tar.gz.1")
271 # check size of first volume
272 size = os.stat("sample.tar.gz").st_size
273 arbitrary_low_size_bound = 315 # adjust if zlib changes
274 arbitrary_high_size_bound = 410 # adjust if zlib changes
275 assert arbitrary_low_size_bound < size < arbitrary_high_size_bound, \
276 'size of sample.tar.gz is {}'.format(size)
279 assert not os.path.exists("big")
281 # extract with normal tar and check output
284 output = subprocess.check_output(
285 "tar xvf sample.tar.gz".split(),
286 universal_newlines=True)
287 #for line in output.splitlines():
288 # print(line.rstrip())
289 assert os.path.exists("big")
290 assert hash == self.md5sum("big")
294 def test_multivol_compress(self):
295 ''' check creation of multiple volumes when compression is on '''
297 # create a random file that is not so easy to compress...
298 filename = create_random_file('./', 100000)
299 hash = self.md5sum(filename)
301 # need own volume handler so files maintain their suffix for gunzip
302 def my_volume_handler(tarobj, base_name, volume_number):
303 if not base_name.endswith('.tar.gz'):
304 raise ValueError('need .tar.gz file!')
305 tarobj.fileobj.close()
306 new_name = '{}.{}.tar.gz'.format(base_name[:-7], volume_number)
307 tarobj.open_volume(new_name)
309 # create the tar file with volumes and compression
310 tarobj = TarFile.open("sample.tar.gz",
312 format=self.tarfile_format,
313 max_volume_size=30000,
314 new_volume_handler=my_volume_handler,
320 # data fits into 2 volumes -- check that no third is created
321 assert os.path.exists("sample.tar.gz")
322 assert os.path.exists("sample.1.tar.gz")
323 assert not os.path.exists("sample.tar.gz.1")
324 assert not os.path.exists("sample.tar.gz.2")
325 assert not os.path.exists("sample.2.tar.gz.2")
328 assert not os.path.exists(filename)
330 # extract with shell means; slightly complicated because the linux
331 # tar/gunzip cannot do gzipped-multi-volume archives
334 for cmd in 'gunzip -v sample.tar.gz', 'gunzip -v sample.1.tar.gz', \
335 'tar xvfM sample.tar --file=sample.1.tar':
337 output = subprocess.check_output(cmd.split(),
338 universal_newlines=True)
339 #for line in output.splitlines():
340 # print(line.rstrip())
341 assert os.path.exists(filename)
342 assert hash == self.md5sum(filename)
344 os.unlink('sample.tar')
345 os.unlink('sample.1.tar')
348 def test_volume_extract1(self):
350 Create a tar file with multiple volumes and one file and extract it
352 # create the content of the file to compress and hash it
353 hash = self.create_file("big", 5*1024*1024)
355 # create the tar file with volumes
356 tarobj = TarFile.open("sample.tar",
358 format=self.tarfile_format,
359 max_volume_size=3*1024*1024,
360 new_volume_handler=new_volume_handler)
364 # check that the tar volumes were correctly created
365 assert os.path.exists("sample.tar")
366 assert os.path.exists("sample.tar.1")
367 assert not os.path.exists("sample.tar.2")
370 assert not os.path.exists("big")
372 # extract and check output
373 tarobj = TarFile.open("sample.tar",
375 new_volume_handler=new_volume_handler)
378 assert os.path.exists("big")
379 assert hash == self.md5sum("big")
381 def test_volume_extract2(self):
383 Create a multivolume tar file with gnu tar command, extract it with
386 # create the content of the file to compress and hash it
387 hash = self.create_file("big", 5*1024*1024)
389 # create the tar file with volumes
390 os.system("tar cM --format=%s -L 3000 big --file=sample.tar "\
391 "--file=sample.tar.1 </dev/null" % self.tar_command_format)
393 # check that the tar volumes were correctly created
394 assert os.path.exists("sample.tar")
395 assert os.path.exists("sample.tar.1")
396 assert not os.path.exists("sample.tar.2")
399 assert not os.path.exists("big")
401 # extract and check output
402 tarobj = TarFile.open("sample.tar",
404 new_volume_handler=new_volume_handler)
407 assert os.path.exists("big")
408 assert hash == self.md5sum("big")
410 def test_volume_extract3(self):
412 Create a multivolume tar file with gnu tar command with multiple
413 files, extract it with tarfile library
415 # create the content of the file to compress and hash it
417 hash["big"] = self.create_file("big", 5*1024*1024)
418 hash["small"] = self.create_file("small", 100)
419 hash["small2"] = self.create_file("small2", 354)
421 # create the tar file with volumes
422 os.system("tar cM --format=%s -L 3000 big small small2 --file=sample.tar "\
423 "--file=sample.tar.1 </dev/null" % self.tar_command_format)
425 # check that the tar volumes were correctly created
426 assert os.path.exists("sample.tar")
427 assert os.path.exists("sample.tar.1")
428 assert not os.path.exists("sample.tar.2")
430 for key, value in hash.items():
432 assert not os.path.exists(key)
434 # extract and check output
435 tarobj = TarFile.open("sample.tar",
437 new_volume_handler=new_volume_handler)
441 for key, value in hash.items():
442 assert os.path.exists(key)
443 assert value == self.md5sum(key)
445 def test_multivol_multifile_extract(self):
447 create a multivolume tar file with multiple files and extracts it
452 hash["big"] = self.create_file("big", 50000)
453 hash["small"] = self.create_file("small", 100)
454 hash["small2"] = self.create_file("small2", 354)
456 # create the tar file with volumes
457 tarobj = TarFile.open("sample.tar",
459 format=self.tarfile_format,
460 max_volume_size=20000,
461 new_volume_handler=new_volume_handler)
467 # check that the tar volumes were correctly created
468 assert os.path.exists("sample.tar")
469 assert os.path.exists("sample.tar.1")
470 assert os.path.exists("sample.tar.2")
471 assert not os.path.exists("sample.tar.3")
477 # extract and check output
478 tarobj = TarFile.open("sample.tar",
480 new_volume_handler=new_volume_handler)
484 for key, value in hash.items():
485 assert os.path.exists(key)
486 assert value == self.md5sum(key)
488 def test_multiple_files_extract(self):
490 creates a simple tar file with no volumes and with multiple files
491 inside and extracts it
496 hash["big"] = self.create_file("big", 50000)
497 hash["small"] = self.create_file("small", 100)
498 hash["small2"] = self.create_file("small2", 354)
500 # create the tar file with volumes
501 tarobj = TarFile.open("sample.tar",
502 format=self.tarfile_format,
509 # check that the tar volumes were correctly created
510 assert os.path.exists("sample.tar")
511 assert not os.path.exists("sample.tar.1")
517 # extract and check output
518 tarobj = TarFile.open("sample.tar",
520 new_volume_handler=new_volume_handler)
524 for key, value in hash.items():
525 assert os.path.exists(key)
526 assert value == self.md5sum(key)
528 def test_corner_case_split_size1(self):
530 Creates a tar file with a single file inside that contains the maximum
531 size allowed in one volume.
533 hash = self.create_file("big", 5*1024*1024)
535 # create the tar file with volumes
536 tarobj = TarFile.open("sample.tar",
538 format=self.tarfile_format,
539 # see tarfile_overhead description for details
540 max_volume_size=5*1024*1024 + self.tarfile_overhead,
541 new_volume_handler=new_volume_handler)
545 # check that the tar volumes were correctly created
546 assert os.path.exists("sample.tar")
547 assert not os.path.exists("sample.tar.1")
550 assert not os.path.exists("big")
552 # extract and check output
553 tarobj = TarFile.open("sample.tar",
555 new_volume_handler=new_volume_handler)
558 assert os.path.exists("big")
559 assert hash == self.md5sum("big")
561 def test_corner_case_split_size2(self):
563 Creates a tar file with a single file inside that contains the maximum
564 size allowed in one volume.
566 hash = self.create_file("big", 4*1024*1024)
568 # create the tar file with volumes
569 tarobj = TarFile.open("sample.tar",
571 format=self.tarfile_format,
572 # see tarvol_overhead description for details
573 max_volume_size=2*1024*1024 + self.tarvol_overhead,
574 new_volume_handler=new_volume_handler)
578 # check that the tar volumes were correctly created
579 assert os.path.exists("sample.tar")
580 assert os.path.exists("sample.tar.1")
581 assert not os.path.exists("sample.tar.2")
584 assert not os.path.exists("big")
586 # extract and check output
587 tarobj = TarFile.open("sample.tar",
589 new_volume_handler=new_volume_handler)
592 assert os.path.exists("big")
593 assert hash == self.md5sum("big")
595 def test_corner_case_split_size3(self):
597 Creates a tar file with a single file inside that contains the maximum
598 size allowed in one volume but without the overhead.
600 hash = self.create_file("big", 4*1024*1024)
602 # create the tar file with volumes
603 tarobj = TarFile.open("sample.tar",
605 format=self.tarfile_format,
606 max_volume_size=2*1024*1024,
607 new_volume_handler=new_volume_handler)
611 # check that the tar volumes were correctly created
612 assert os.path.exists("sample.tar")
613 assert os.path.exists("sample.tar.1")
614 assert os.path.exists("sample.tar.2")
615 assert not os.path.exists("sample.tar.3")
618 assert not os.path.exists("big")
620 # extract and check output
621 tarobj = TarFile.open("sample.tar",
623 new_volume_handler=new_volume_handler)
626 assert os.path.exists("big")
627 assert hash == self.md5sum("big")
629 def test_corner_case_split_size4(self):
631 Creates a tar file with multiple files inside that contains the maximum
632 size allowed in one volume.
635 hash['big'] = self.create_file("big", 3*1024*1024)
636 hash['small'] = self.create_file("small", 1*1024*1024)
638 # create the tar file with volumes
639 tarobj = TarFile.open("sample.tar",
641 format=self.tarfile_format,
642 max_volume_size=(4*1024*1024 +
643 self.tarfile_overhead +
645 new_volume_handler=new_volume_handler)
650 # check that the tar volumes were correctly created
651 assert os.path.exists("sample.tar")
652 assert not os.path.exists("sample.tar.1")
654 for key, value in hash.items():
656 assert not os.path.exists(key)
658 # extract and check output
659 tarobj = TarFile.open("sample.tar",
661 new_volume_handler=new_volume_handler)
665 for key, value in hash.items():
666 assert os.path.exists(key)
667 assert value == self.md5sum(key)
669 def test_corner_case_split_size5(self):
671 Creates a tar file with multiple files inside that contains the maximum
672 size allowed in one volume.
675 hash['big'] = self.create_file("big", 3*1024*1024)
676 hash['small'] = self.create_file("small", 1*1024*1024)
678 # create the tar file with volumes
679 tarobj = TarFile.open("sample.tar",
681 format=self.tarfile_format,
682 max_volume_size=(2*1024*1024 +
683 self.tarfile_overhead +
685 new_volume_handler=new_volume_handler)
690 # check that the tar volumes were correctly created
691 assert os.path.exists("sample.tar")
692 assert os.path.exists("sample.tar.1")
693 assert not os.path.exists("sample.tar.2")
695 for key, value in hash.items():
697 assert not os.path.exists(key)
699 # extract and check output
700 tarobj = TarFile.open("sample.tar",
702 new_volume_handler=new_volume_handler)
706 for key, value in hash.items():
707 assert os.path.exists(key)
708 assert value == self.md5sum(key)
710 def test_volume_not_found(self):
712 Create a tar file with multiple volumes and one file and extract it, but
713 one of the volumes is missing
715 # create the content of the file to compress and hash it
716 hash = self.create_file("big", 5*1024*1024)
718 # create the tar file with volumes
719 tarobj = TarFile.open("sample.tar",
721 format=self.tarfile_format,
722 max_volume_size=2*1024*1024,
723 new_volume_handler=closing_new_volume_handler)
727 # check that the tar volumes were correctly created
728 assert os.path.exists("sample.tar")
729 assert os.path.exists("sample.tar.1")
730 assert os.path.exists("sample.tar.2")
731 assert not os.path.exists("sample.tar.3")
734 os.unlink("sample.tar.2")
736 class VolumeNotFound(Exception):
739 def new_volume_handler2(tarobj, base_name, volume_number):
741 Handles the new volumes
743 volume_path = "%s.%d" % (base_name, volume_number)
746 tarobj.fileobj.close()
747 tarobj.open_volume(volume_path)
749 # only volume number 2 is missing
750 assert volume_number == 2
753 # extract and check output
754 tarobj = TarFile.open("sample.tar",
756 new_volume_handler=new_volume_handler2)
762 assert os.path.exists("big")
763 assert hash != self.md5sum("big")
765 def test_multivol_compress_vol_size(self):
766 """ test size of compressed volumes using "external" test routine
768 created an extensive test of this in extra file, run here just 2 short
773 input_size_factor = 3 # --> add 3*3 MiB of data
774 modes = ('w#gz', None), ('w#gz', "test1234")
775 debug_level = 0 # no debug output
776 clean_up_if_error = True # leave no files behind
778 with TemporaryDirectory(prefix='deltatar_test_multivol_') \
779 as temp_dir: # is deleted automatically after test
780 for mode, password in modes:
781 multivol_compr_test_func(vol_size, input_size_factor, mode,
784 debug_level=debug_level,
785 clean_up_if_error=clean_up_if_error)
788 class MultivolPaxFormatTest(MultivolGnuFormatTest):
790 Test multivolume support in tarfile with PAX format
793 tar_command_format = "pax"
795 tarfile_format = PAX_FORMAT
797 # overhead size used to calculate the exact maximum size of a tar file with
798 # no extra volume that stores only one file. In case of GNU format this is
799 # the size of three blocks:
800 # * 1 block used to store the header information of the stored file
801 # * 1 block used to store the header information of the pax header
802 # * 1 block used to store the pax header
803 # * 2 blocks used to mark the end of the tar file
804 tarfile_overhead = 5*BLOCKSIZE
805 file_overhead = 3*BLOCKSIZE
807 # overhead size used to calculate the exact maximum size of a tar volume,
808 # corresponding with a multivolume tar file storing a single file. In the
809 # case of Pax format, it's the same as tarfile_overhead plus a block for
811 tarvol_overhead = 6*BLOCKSIZE