6 from functools import partial
8 import deltatar.deltatar as deltatar
9 import deltatar.crypto as crypto
10 import deltatar.tarfile as tarfile
12 from . import BaseTest
14 TEST_PASSWORD = "test1234"
17 VOLUME_OVERHEAD = 1.4 # account for tar overhead when fitting files into
18 # volumes; this is black magic
21 ###############################################################################
23 ###############################################################################
25 def flip_bits (fname, off, b=0x01, n=1):
27 Open file *fname* at offset *off*, replacing the next *n* bytes with
28 their values xor’ed with *b*.
30 fd = os.open (fname, os.O_RDWR)
33 pos = os.lseek (fd, off, os.SEEK_SET)
35 chunk = os.read (fd, n)
36 chunk = bytes (map (lambda v: v ^ b, chunk))
37 pos = os.lseek (fd, off, os.SEEK_SET)
44 def gz_header_size (fname, off=0):
46 Determine the length of the gzip header starting at *off* in file fname.
48 The header is variable length because it may contain the filename as NUL
51 # length so we need to determine where the actual payload starts
52 off = tarfile.GZ_HEADER_SIZE
53 fd = os.open (fname, os.O_RDONLY)
56 pos = os.lseek (fd, off, os.SEEK_SET)
58 while os.read (fd, 1)[0] != 0:
60 pos = os.lseek (fd, off, os.SEEK_SET)
68 def is_pdt_encrypted (fname):
70 Returns true if the file contains at least one PDT header plus enough
74 with open (fname, "rb") as st:
75 hdr = crypto.hdr_read_stream (st)
77 assert (len (st.read (siz)) == siz)
78 except Exception as exn:
83 ###############################################################################
84 ## corruption simulators ##
85 ###############################################################################
87 class UndefinedTest (Exception):
88 """No test available for the asked combination of parameters."""
90 def corrupt_header (_, fname, compress, encrypt):
92 Modify a significant byte in the object header of the format.
94 if encrypt is True: # damage GCM tag
95 flip_bits (fname, crypto.HDR_OFF_TAG + 1)
96 elif compress is True: # invalidate magic
98 else: # Fudge checksum. From tar(5):
100 # struct header_gnu_tar {
109 flip_bits (fname, 100 + 8 + 8 + 8 + 12 + 12 + 1)
112 def corrupt_ctsize (_, fname, compress, encrypt):
114 Blow up the size of an object so as to cause its apparent payload to leak
118 # damage lowest bit of second least significant byte of size field;
119 # this effectively sets the ciphertext size to 422, causing it to
120 # extend over the next object into the third one.
121 return flip_bits (fname, crypto.HDR_OFF_CTSIZE + 1, b=0x01)
122 raise UndefinedTest ("corrupt_ctsize %s %s %s" % (fname, compress, encrypt))
125 def corrupt_entire_header (_, fname, compress, encrypt):
127 Flip all bits in the first object header.
130 flip_bits (fname, 0, 0xff, crypto.PDTCRYPT_HDR_SIZE)
131 elif compress is True: # invalidate magic
132 flip_bits (fname, 0, 0xff, gz_header_size (fname))
134 flip_bits (fname, 0, 0xff, tarfile.BLOCKSIZE)
137 def corrupt_payload_start (_, fname, compress, encrypt):
139 Modify the byte following the object header structure of the format.
142 flip_bits (fname, crypto.PDTCRYPT_HDR_SIZE + 1)
143 elif compress is True:
144 flip_bits (fname, gz_header_size (fname) + 1)
146 flip_bits (fname, tarfile.BLOCKSIZE + 1)
149 def corrupt_leading_garbage (_, fname, compress, encrypt):
151 Prepend junk to file.
153 aname = os.path.abspath (fname)
154 infd = os.open (fname, os.O_RDONLY)
155 size = os.lseek (infd, 0, os.SEEK_END)
156 assert os.lseek (infd, 0, os.SEEK_SET) == 0
157 outfd = os.open (os.path.dirname (aname), os.O_WRONLY | os.O_TMPFILE,
158 stat.S_IRUSR | stat.S_IWUSR)
159 junk = os.urandom (42)
161 # write new file with garbage prepended
163 os.write (outfd, junk) # junk first
166 data = os.read (infd, TEST_BLOCKSIZE)
167 os.write (outfd, data)
170 assert os.lseek (outfd, 0, os.SEEK_CUR) == done
172 # close and free old file
176 # install the new file in its place, atomically
177 path = "/proc/self/fd/%d" % outfd
178 os.link (path, aname, src_dir_fd=0, follow_symlinks=True)
182 def corrupt_trailing_data (_, fname, compress, encrypt):
184 Modify the byte following the object header structure of the format.
186 junk = os.urandom (42)
187 fd = os.open (fname, os.O_WRONLY | os.O_APPEND)
192 def corrupt_volume (_, fname, compress, encrypt):
194 Zero out an entire volume.
196 fd = os.open (fname, os.O_WRONLY)
197 size = os.lseek (fd, 0, os.SEEK_END)
198 assert os.lseek (fd, 0, os.SEEK_SET) == 0
199 zeros = bytes (b'\x00' * TEST_BLOCKSIZE)
201 todo = min (size, TEST_BLOCKSIZE)
202 os.write (fd, zeros [:todo])
207 def corrupt_hole (_, fname, compress, encrypt):
209 Cut file in three pieces, reassemble without the middle one.
211 aname = os.path.abspath (fname)
212 infd = os.open (fname, os.O_RDONLY)
213 size = os.lseek (infd, 0, os.SEEK_END)
214 assert os.lseek (infd, 0, os.SEEK_SET) == 0
215 assert size > 3 * TEST_BLOCKSIZE
216 hole = (size / 3, size * 2 / 3)
217 outfd = os.open (os.path.dirname (aname), os.O_WRONLY | os.O_TMPFILE,
218 stat.S_IRUSR | stat.S_IWUSR)
222 data = os.read (infd, TEST_BLOCKSIZE)
223 if done < hole [0] or hole [1] < done:
224 # only copy from outside hole
225 os.write (outfd, data)
231 path = "/proc/self/fd/%d" % outfd
232 os.link (path, aname, src_dir_fd=0, follow_symlinks=True)
235 def immaculate (_, _fname, _compress, _encrypt):
241 ###############################################################################
243 ###############################################################################
245 class DefectiveTest (BaseTest):
247 Disaster recovery: restore corrupt backups.
252 FAILURES = 0 # files that could not be restored
253 MISMATCHES = 0 # files that were restored but corrupted
254 CORRUPT = corrupt_payload_start
256 MISSING = None # normally the number of failures
261 Create base test data
263 self.pwd = os.getcwd()
264 self.dst_path = "source_dir"
265 self.src_path = "%s2" % self.dst_path
268 os.system('rm -rf target_dir source_dir* backup_dir* huge')
269 os.makedirs (self.src_path)
273 self.hash [f] = self.create_file ("%s/%s"
274 % (self.src_path, f), 5 + i)
279 Remove temporal files created by unit tests and reset globals.
282 os.system("rm -rf source_dir source_dir2 backup_dir*")
286 def default_volume_name (backup_file, _x, _y, n, *a, **kwa):
287 return backup_file % n
289 def gen_file_names (self, comp, pw):
290 bak_path = "backup_dir"
291 backup_file = "the_full_backup_%0.2d.tar"
292 backup_full = ("%s/%s" % (bak_path, backup_file)) % 0
293 index_file = "the_full_index"
295 if self.COMPRESSION is not None:
300 if self.PASSWORD is not None:
301 backup_file = "%s.%s" % (backup_file, deltatar.PDTCRYPT_EXTENSION)
302 backup_full = "%s.%s" % (backup_full, deltatar.PDTCRYPT_EXTENSION)
303 index_file = "%s.%s" % (index_file , deltatar.PDTCRYPT_EXTENSION)
305 return bak_path, backup_file, backup_full, index_file
308 class RecoverTest (DefectiveTest):
310 Recover: restore corrupt backups from index file information.
313 def test_recover_corrupt (self):
315 Perform various damaging actions that cause unreadable objects.
317 Expects the extraction to fail in normal mode. With disaster recovery,
318 extraction must succeed, and exactly one file must be missing.
320 mode = self.COMPRESSION or "#"
321 bak_path, backup_file, backup_full, index_file = \
322 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
325 # add n files for one nth the volume size each, corrected
326 # for metadata and tar block overhead
327 fsiz = int ( ( TEST_VOLSIZ
328 / (TEST_FILESPERVOL * VOLUME_OVERHEAD))
330 fcnt = (self.VOLUMES - 1) * TEST_FILESPERVOL
331 for i in range (fcnt):
332 nvol, invol = divmod(i, TEST_FILESPERVOL)
333 f = "dummy_vol_%d_n_%0.2d" % (nvol, invol)
334 self.hash [f] = self.create_file ("%s/%s"
335 % (self.src_path, f),
339 vname = partial (self.default_volume_name, backup_file)
340 dtar = deltatar.DeltaTar (mode=mode,
342 password=self.PASSWORD,
343 index_name_func=lambda _: index_file,
344 volume_name_func=vname)
346 dtar.create_full_backup \
347 (source_path=self.src_path, backup_path=bak_path,
350 if self.PASSWORD is not None:
351 # ensure all files are at least superficially in PDT format
352 for f in os.listdir (bak_path):
353 assert is_pdt_encrypted ("%s/%s" % (bak_path, f))
355 # first restore must succeed
356 dtar.restore_backup(target_path=self.dst_path,
357 backup_indexes_paths=[
358 "%s/%s" % (bak_path, index_file)
360 for key, value in self.hash.items ():
361 f = "%s/%s" % (self.dst_path, key)
362 assert os.path.exists (f)
363 assert value == self.md5sum (f)
364 shutil.rmtree (self.dst_path)
365 shutil.rmtree (self.src_path)
367 self.CORRUPT (backup_full,
368 self.COMPRESSION is not None,
369 self.PASSWORD is not None)
371 # normal restore must fail
373 dtar.restore_backup(target_path=self.dst_path,
374 backup_tar_path=backup_full)
375 except tarfile.CompressionError:
376 if self.PASSWORD is not None or self.COMPRESSION is not None:
380 except tarfile.ReadError:
381 # can happen with all three modes
383 except tarfile.DecryptionError:
384 if self.PASSWORD is not None:
389 os.chdir (self.pwd) # not restored due to the error above
390 # but recover will succeed
391 failed = dtar.recover_backup(target_path=self.dst_path,
392 backup_indexes_paths=[
393 "%s/%s" % (bak_path, index_file)
396 assert len (failed) == self.FAILURES
398 # with one file missing
401 for key, value in self.hash.items ():
402 kkey = "%s/%s" % (self.dst_path, key)
403 if os.path.exists (kkey):
404 if value != self.md5sum (kkey):
405 mismatch.append (key)
409 # usually, an object whose extraction fails will not be found on
410 # disk afterwards so the number of failures equals that of missing
411 # files. however, some modes will create partial files for objects
412 # spanning multiple volumes that contain the parts whose checksums
414 assert len (missing) == (self.MISSING if self.MISSING is not None
416 assert len (mismatch) == self.MISMATCHES
418 shutil.rmtree (self.dst_path)
421 class RescueTest (DefectiveTest):
423 Rescue: restore corrupt backups from backup set that is damaged to a degree
424 that the index file is worthless.
427 def test_rescue_corrupt (self):
429 Perform various damaging actions that cause unreadable objects, then
430 attempt to extract objects regardless.
432 mode = self.COMPRESSION or "#"
433 bak_path, backup_file, backup_full, index_file = \
434 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
437 # add n files for one nth the volume size each, corrected
438 # for metadata and tar block overhead
439 fsiz = int ( ( TEST_VOLSIZ
440 / (TEST_FILESPERVOL * VOLUME_OVERHEAD))
442 fcnt = (self.VOLUMES - 1) * TEST_FILESPERVOL
443 for i in range (fcnt):
444 nvol, invol = divmod(i, TEST_FILESPERVOL)
445 f = "dummy_vol_%d_n_%0.2d" % (nvol, invol)
446 self.hash [f] = self.create_file ("%s/%s"
447 % (self.src_path, f),
451 vname = partial (self.default_volume_name, backup_file)
452 dtar = deltatar.DeltaTar (mode=mode,
454 password=self.PASSWORD,
455 index_name_func=lambda _: index_file,
456 volume_name_func=vname)
458 dtar.create_full_backup \
459 (source_path=self.src_path, backup_path=bak_path,
462 if self.PASSWORD is not None:
463 # ensure all files are at least superficially in PDT format
464 for f in os.listdir (bak_path):
465 assert is_pdt_encrypted ("%s/%s" % (bak_path, f))
467 # first restore must succeed
468 dtar.restore_backup(target_path=self.dst_path,
469 backup_indexes_paths=[
470 "%s/%s" % (bak_path, index_file)
472 for key, value in self.hash.items ():
473 f = "%s/%s" % (self.dst_path, key)
474 assert os.path.exists (f)
475 assert value == self.md5sum (f)
476 shutil.rmtree (self.dst_path)
477 shutil.rmtree (self.src_path)
479 self.CORRUPT (backup_full,
480 self.COMPRESSION is not None,
481 self.PASSWORD is not None)
483 # normal restore must fail
485 dtar.restore_backup(target_path=self.dst_path,
486 backup_tar_path=backup_full)
487 except tarfile.CompressionError:
488 if self.PASSWORD is not None or self.COMPRESSION is not None:
492 except tarfile.ReadError:
493 # can happen with all three modes
495 except tarfile.DecryptionError:
496 if self.PASSWORD is not None:
501 os.chdir (self.pwd) # not restored due to the error above
502 # but recover will succeed
503 failed = dtar.rescue_backup(target_path=self.dst_path,
504 backup_tar_path=backup_full)
505 # with one file missing
508 for key, value in self.hash.items ():
509 kkey = "%s/%s" % (self.dst_path, key)
510 if os.path.exists (kkey):
511 if value != self.md5sum (kkey):
512 mismatch.append (key)
516 assert len (failed) == self.FAILURES
517 assert len (missing) == (self.MISSING if self.MISSING is not None
519 assert len (mismatch) == self.MISMATCHES
521 shutil.rmtree (self.dst_path)
524 class GenIndexTest (DefectiveTest):
526 Deducing an index for a backup with tarfile.
529 def test_gen_index (self):
531 Create backup, leave it unharmed, then generate an index.
533 mode = self.COMPRESSION or "#"
534 bak_path, backup_file, backup_full, index_file = \
535 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
537 vname = partial (self.default_volume_name, backup_file)
538 dtar = deltatar.DeltaTar (mode=mode,
540 password=self.PASSWORD,
541 index_name_func=lambda _: index_file,
542 volume_name_func=vname)
544 dtar.create_full_backup \
545 (source_path=self.src_path, backup_path=bak_path,
548 def gen_volume_name (nvol):
549 return os.path.join (bak_path, vname (backup_full, True, nvol))
551 psidx = tarfile.gen_rescue_index (gen_volume_name,
553 password=self.PASSWORD)
555 assert len (psidx) == len (self.hash)
558 ###############################################################################
560 ###############################################################################
562 class RecoverCorruptPayloadTestBase (RecoverTest):
565 FAILURES = 0 # tarfile will restore but corrupted, as
566 MISMATCHES = 1 # revealed by the hash
568 class RecoverCorruptPayloadSingleTest (RecoverCorruptPayloadTestBase):
571 class RecoverCorruptPayloadMultiTest (RecoverCorruptPayloadTestBase):
575 class RecoverCorruptPayloadGZTestBase (RecoverTest):
581 class RecoverCorruptPayloadGZSingleTest (RecoverCorruptPayloadGZTestBase):
584 class RecoverCorruptPayloadGZMultiTest (RecoverCorruptPayloadGZTestBase):
588 class RecoverCorruptPayloadGZAESTestBase (RecoverTest):
590 PASSWORD = TEST_PASSWORD
594 class RecoverCorruptPayloadGZAESSingleTest (RecoverCorruptPayloadGZAESTestBase):
597 class RecoverCorruptPayloadGZAESMultiTest (RecoverCorruptPayloadGZAESTestBase):
601 class RecoverCorruptHeaderTestBase (RecoverTest):
605 CORRUPT = corrupt_header
608 class RecoverCorruptHeaderSingleTest (RecoverCorruptHeaderTestBase):
611 class RecoverCorruptHeaderMultiTest (RecoverCorruptHeaderTestBase):
615 class RecoverCorruptHeaderGZTestBase (RecoverTest):
619 CORRUPT = corrupt_header
622 class RecoverCorruptHeaderGZSingleTest (RecoverCorruptHeaderGZTestBase):
625 class RecoverCorruptHeaderGZMultiTest (RecoverCorruptHeaderGZTestBase):
629 class RecoverCorruptHeaderGZAESTestBase (RecoverTest):
631 PASSWORD = TEST_PASSWORD
633 CORRUPT = corrupt_header
636 class RecoverCorruptHeaderGZAESSingleTest (RecoverCorruptHeaderGZAESTestBase):
639 class RecoverCorruptHeaderGZAESMultiTest (RecoverCorruptHeaderGZAESTestBase):
643 class RecoverCorruptEntireHeaderTestBase (RecoverTest):
647 CORRUPT = corrupt_entire_header
650 class RecoverCorruptEntireHeaderSingleTest (RecoverCorruptEntireHeaderTestBase):
653 class RecoverCorruptEntireHeaderMultiTest (RecoverCorruptEntireHeaderTestBase):
657 class RecoverCorruptEntireHeaderGZTestBase (RecoverTest):
661 CORRUPT = corrupt_entire_header
664 class RecoverCorruptEntireHeaderGZSingleTest (RecoverCorruptEntireHeaderGZTestBase):
667 class RecoverCorruptEntireHeaderGZMultiTest (RecoverCorruptEntireHeaderGZTestBase):
671 class RecoverCorruptEntireHeaderGZAESTestBase (RecoverTest):
673 PASSWORD = TEST_PASSWORD
675 CORRUPT = corrupt_entire_header
678 class RecoverCorruptEntireHeaderGZAESSingleTest (RecoverCorruptEntireHeaderGZAESTestBase):
681 class RecoverCorruptEntireHeaderGZAESMultiTest (RecoverCorruptEntireHeaderGZAESTestBase):
685 class RecoverCorruptTrailingDataTestBase (RecoverTest):
686 # plain Tar is indifferent against traling data and the results
691 CORRUPT = corrupt_trailing_data
694 class RecoverCorruptTrailingDataSingleTest (RecoverCorruptTrailingDataTestBase):
697 class RecoverCorruptTrailingDataMultiTest (RecoverCorruptTrailingDataTestBase):
698 # the last object in first archive has extra bytes somewhere in the
699 # middle because tar itself performs no data checksumming.
704 class RecoverCorruptTrailingDataGZTestBase (RecoverTest):
705 # reading past the final object will cause decompression failure;
706 # all objects except for the last survive unharmed though
710 CORRUPT = corrupt_trailing_data
713 class RecoverCorruptTrailingDataGZSingleTest (RecoverCorruptTrailingDataGZTestBase):
716 class RecoverCorruptTrailingDataGZMultiTest (RecoverCorruptTrailingDataGZTestBase):
718 # the last file of the first volume will only contain the data of the
719 # second part which is contained in the second volume. this happens
720 # because the CRC32 is wrong for the first part so it gets discarded, then
721 # the object is recreated from the first header of the second volume,
722 # containing only the remainder of the data.
727 class RecoverCorruptTrailingDataGZAESTestBase (RecoverTest):
729 PASSWORD = TEST_PASSWORD
731 CORRUPT = corrupt_trailing_data
734 class RecoverCorruptTrailingDataGZAESSingleTest (RecoverCorruptTrailingDataGZAESTestBase):
737 class RecoverCorruptTrailingDataGZAESMultiTest (RecoverCorruptTrailingDataGZAESTestBase):
741 class RecoverCorruptVolumeBaseTest (RecoverTest):
745 CORRUPT = corrupt_volume
748 class RecoverCorruptVolumeTest (RecoverCorruptVolumeBaseTest):
751 class RecoverCorruptVolumeGZTest (RecoverCorruptVolumeBaseTest):
754 class RecoverCorruptVolumeGZAESTest (RecoverCorruptVolumeBaseTest):
756 PASSWORD = TEST_PASSWORD
759 class RecoverCorruptHoleBaseTest (RecoverTest):
761 Cut bytes from the middle of a volume.
763 Index-based recovery works only up to the hole.
768 CORRUPT = corrupt_hole
769 VOLUMES = 2 # request two vols to swell up the first one
772 class RecoverCorruptHoleTest (RecoverCorruptHoleBaseTest):
775 class RecoverCorruptHoleGZTest (RecoverCorruptHoleBaseTest):
779 class RecoverCorruptHoleGZAESTest (RecoverCorruptHoleBaseTest):
781 PASSWORD = TEST_PASSWORD
784 ###############################################################################
786 ###############################################################################
788 class RescueCorruptHoleBaseTest (RescueTest):
790 Cut bytes from the middle of a volume.
795 CORRUPT = corrupt_hole
796 VOLUMES = 2 # request two vols to swell up the first one
797 MISMATCHES = 2 # intersected by hole
798 MISSING = 1 # excised by hole
800 class RescueCorruptHoleTest (RescueCorruptHoleBaseTest):
803 class RescueCorruptHoleGZTest (RescueCorruptHoleBaseTest):
805 # the decompressor explodes in our face processing the first dummy, nothing
806 # we can do to recover
809 class RescueCorruptHoleGZAESTest (RescueCorruptHoleBaseTest):
811 PASSWORD = TEST_PASSWORD
812 # again, ignoring the crypto errors yields a bad zlib stream causing the
813 # decompressor to abort where the hole begins; the file is extracted up
814 # to this point though
818 class RescueCorruptHeaderCTSizeGZAESTest (RescueTest):
820 PASSWORD = TEST_PASSWORD
822 CORRUPT = corrupt_ctsize
826 class RescueCorruptLeadingGarbageTestBase (RescueTest):
827 # plain Tar is indifferent against traling data and the results
832 CORRUPT = corrupt_leading_garbage
835 class RescueCorruptLeadingGarbageSingleTest (RescueCorruptLeadingGarbageTestBase):
838 class RescueCorruptLeadingGarbageMultiTest (RescueCorruptLeadingGarbageTestBase):
839 # the last object in first archive has extra bytes somewhere in the
840 # middle because tar itself performs no data checksumming.
845 ###############################################################################
847 ###############################################################################
849 class GenIndexIntactBaseTest (GenIndexTest):
860 class GenIndexIntactTest (GenIndexIntactBaseTest):
863 class GenIndexIntactGZTest (GenIndexIntactBaseTest):
867 class GenIndexIntactGZAESTest (GenIndexIntactBaseTest):
869 PASSWORD = TEST_PASSWORD