6 from functools import partial
8 import deltatar.deltatar as deltatar
9 import deltatar.crypto as crypto
10 import deltatar.tarfile as tarfile
12 from . import BaseTest
14 TEST_PASSWORD = "test1234"
17 VOLUME_OVERHEAD = 1.4 # account for tar overhead when fitting files into
18 # volumes; this is black magic
21 ###############################################################################
23 ###############################################################################
25 def flip_bits (fname, off, b=0x01, n=1):
27 Open file *fname* at offset *off*, replacing the next *n* bytes with
28 their values xor’ed with *b*.
30 fd = os.open (fname, os.O_RDWR)
33 pos = os.lseek (fd, off, os.SEEK_SET)
35 chunk = os.read (fd, n)
36 chunk = bytes (map (lambda v: v ^ b, chunk))
37 pos = os.lseek (fd, off, os.SEEK_SET)
44 def gz_header_size (fname, off=0):
46 Determine the length of the gzip header starting at *off* in file fname.
48 The header is variable length because it may contain the filename as NUL
51 # length so we need to determine where the actual payload starts
52 off = tarfile.GZ_HEADER_SIZE
53 fd = os.open (fname, os.O_RDONLY)
56 pos = os.lseek (fd, off, os.SEEK_SET)
58 while os.read (fd, 1)[0] != 0:
60 pos = os.lseek (fd, off, os.SEEK_SET)
68 def is_pdt_encrypted (fname):
70 Returns true if the file contains at least one PDT header plus enough
74 with open (fname, "rb") as st:
75 hdr = crypto.hdr_read_stream (st)
77 assert (len (st.read (siz)) == siz)
78 except Exception as exn:
83 ###############################################################################
84 ## corruption simulators ##
85 ###############################################################################
87 class UndefinedTest (Exception):
88 """No test available for the asked combination of parameters."""
90 def corrupt_header (_, fname, compress, encrypt):
92 Modify a significant byte in the object header of the format.
94 if encrypt is True: # damage GCM tag
95 flip_bits (fname, crypto.HDR_OFF_TAG + 1)
96 elif compress is True: # invalidate magic
98 else: # Fudge checksum. From tar(5):
100 # struct header_gnu_tar {
109 flip_bits (fname, 100 + 8 + 8 + 8 + 12 + 12 + 1)
112 def corrupt_ctsize (_, fname, compress, encrypt):
114 Blow up the size of an object so as to cause its apparent payload to leak
118 # damage lowest bit of second least significant byte of size field;
119 # this effectively sets the ciphertext size to 422, causing it to
120 # extend over the next object into the third one.
121 return flip_bits (fname, crypto.HDR_OFF_CTSIZE + 1, b=0x01)
122 raise UndefinedTest ("corrupt_ctsize %s %s %s" % (fname, compress, encrypt))
125 def corrupt_entire_header (_, fname, compress, encrypt):
127 Flip all bits in the first object header.
130 flip_bits (fname, 0, 0xff, crypto.PDTCRYPT_HDR_SIZE)
131 elif compress is True: # invalidate magic
132 flip_bits (fname, 0, 0xff, gz_header_size (fname))
134 flip_bits (fname, 0, 0xff, tarfile.BLOCKSIZE)
137 def corrupt_payload_start (_, fname, compress, encrypt):
139 Modify the byte following the object header structure of the format.
142 flip_bits (fname, crypto.PDTCRYPT_HDR_SIZE + 1)
143 elif compress is True:
144 flip_bits (fname, gz_header_size (fname) + 1)
146 flip_bits (fname, tarfile.BLOCKSIZE + 1)
149 def corrupt_leading_garbage (_, fname, compress, encrypt):
151 Prepend junk to file.
153 aname = os.path.abspath (fname)
154 infd = os.open (fname, os.O_RDONLY)
155 size = os.lseek (infd, 0, os.SEEK_END)
156 assert os.lseek (infd, 0, os.SEEK_SET) == 0
157 outfd = os.open (os.path.dirname (aname), os.O_WRONLY | os.O_TMPFILE,
158 stat.S_IRUSR | stat.S_IWUSR)
159 junk = os.urandom (512) # tar block sized
161 # write new file with garbage prepended
163 os.write (outfd, junk) # junk first
166 data = os.read (infd, TEST_BLOCKSIZE)
167 os.write (outfd, data)
170 assert os.lseek (outfd, 0, os.SEEK_CUR) == done
172 # close and free old file
176 # install the new file in its place, atomically
177 path = "/proc/self/fd/%d" % outfd
178 os.link (path, aname, src_dir_fd=0, follow_symlinks=True)
182 def corrupt_trailing_data (_, fname, compress, encrypt):
184 Modify the byte following the object header structure of the format.
186 junk = os.urandom (42)
187 fd = os.open (fname, os.O_WRONLY | os.O_APPEND)
192 def corrupt_volume (_, fname, compress, encrypt):
194 Zero out an entire volume.
196 fd = os.open (fname, os.O_WRONLY)
197 size = os.lseek (fd, 0, os.SEEK_END)
198 assert os.lseek (fd, 0, os.SEEK_SET) == 0
199 zeros = bytes (b'\x00' * TEST_BLOCKSIZE)
201 todo = min (size, TEST_BLOCKSIZE)
202 os.write (fd, zeros [:todo])
207 def corrupt_hole (_, fname, compress, encrypt):
209 Cut file in three pieces, reassemble without the middle one.
211 aname = os.path.abspath (fname)
212 infd = os.open (fname, os.O_RDONLY)
213 size = os.lseek (infd, 0, os.SEEK_END)
214 assert os.lseek (infd, 0, os.SEEK_SET) == 0
215 assert size > 3 * TEST_BLOCKSIZE
216 hole = (size / 3, size * 2 / 3)
217 outfd = os.open (os.path.dirname (aname), os.O_WRONLY | os.O_TMPFILE,
218 stat.S_IRUSR | stat.S_IWUSR)
222 data = os.read (infd, TEST_BLOCKSIZE)
223 if done < hole [0] or hole [1] < done:
224 # only copy from outside hole
225 os.write (outfd, data)
231 path = "/proc/self/fd/%d" % outfd
232 os.link (path, aname, src_dir_fd=0, follow_symlinks=True)
235 def immaculate (_, _fname, _compress, _encrypt):
241 ###############################################################################
243 ###############################################################################
245 class DefectiveTest (BaseTest):
247 Disaster recovery: restore corrupt backups.
252 FAILURES = 0 # files that could not be restored
253 MISMATCHES = 0 # files that were restored but corrupted
254 CORRUPT = corrupt_payload_start
256 MISSING = None # normally the number of failures
261 Create base test data
263 self.pwd = os.getcwd()
264 self.dst_path = "source_dir"
265 self.src_path = "%s2" % self.dst_path
268 os.system('rm -rf target_dir source_dir* backup_dir* huge')
269 os.makedirs (self.src_path)
273 self.hash [f] = self.create_file ("%s/%s"
274 % (self.src_path, f), 5 + i)
279 Remove temporal files created by unit tests and reset globals.
282 os.system("rm -rf source_dir source_dir2 backup_dir*")
286 def default_volume_name (backup_file, _x, _y, n, *a, **kwa):
287 return backup_file % n
289 def gen_file_names (self, comp, pw):
290 bak_path = "backup_dir"
291 backup_file = "the_full_backup_%0.2d.tar"
292 backup_full = ("%s/%s" % (bak_path, backup_file)) % 0
293 index_file = "the_full_index"
295 if self.COMPRESSION is not None:
300 if self.PASSWORD is not None:
301 backup_file = "%s.%s" % (backup_file, deltatar.PDTCRYPT_EXTENSION)
302 backup_full = "%s.%s" % (backup_full, deltatar.PDTCRYPT_EXTENSION)
303 index_file = "%s.%s" % (index_file , deltatar.PDTCRYPT_EXTENSION)
305 return bak_path, backup_file, backup_full, index_file
308 class RecoverTest (DefectiveTest):
310 Recover: restore corrupt backups from index file information.
313 def test_recover_corrupt (self):
315 Perform various damaging actions that cause unreadable objects.
317 Expects the extraction to fail in normal mode. With disaster recovery,
318 extraction must succeed, and exactly one file must be missing.
320 mode = self.COMPRESSION or "#"
321 bak_path, backup_file, backup_full, index_file = \
322 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
325 # add n files for one nth the volume size each, corrected
326 # for metadata and tar block overhead
327 fsiz = int ( ( TEST_VOLSIZ
328 / (TEST_FILESPERVOL * VOLUME_OVERHEAD))
330 fcnt = (self.VOLUMES - 1) * TEST_FILESPERVOL
331 for i in range (fcnt):
332 nvol, invol = divmod(i, TEST_FILESPERVOL)
333 f = "dummy_vol_%d_n_%0.2d" % (nvol, invol)
334 self.hash [f] = self.create_file ("%s/%s"
335 % (self.src_path, f),
339 vname = partial (self.default_volume_name, backup_file)
340 dtar = deltatar.DeltaTar (mode=mode,
342 password=self.PASSWORD,
343 index_name_func=lambda _: index_file,
344 volume_name_func=vname)
346 dtar.create_full_backup \
347 (source_path=self.src_path, backup_path=bak_path,
350 if self.PASSWORD is not None:
351 # ensure all files are at least superficially in PDT format
352 for f in os.listdir (bak_path):
353 assert is_pdt_encrypted ("%s/%s" % (bak_path, f))
355 # first restore must succeed
356 dtar.restore_backup(target_path=self.dst_path,
357 backup_indexes_paths=[
358 "%s/%s" % (bak_path, index_file)
360 for key, value in self.hash.items ():
361 f = "%s/%s" % (self.dst_path, key)
362 assert os.path.exists (f)
363 assert value == self.md5sum (f)
364 shutil.rmtree (self.dst_path)
365 shutil.rmtree (self.src_path)
367 self.CORRUPT (backup_full,
368 self.COMPRESSION is not None,
369 self.PASSWORD is not None)
371 # normal restore must fail
373 dtar.restore_backup(target_path=self.dst_path,
374 backup_tar_path=backup_full)
375 except tarfile.CompressionError:
376 if self.PASSWORD is not None or self.COMPRESSION is not None:
380 except tarfile.ReadError:
381 # can happen with all three modes
383 except tarfile.DecryptionError:
384 if self.PASSWORD is not None:
389 os.chdir (self.pwd) # not restored due to the error above
390 # but recover will succeed
391 failed = dtar.recover_backup(target_path=self.dst_path,
392 backup_indexes_paths=[
393 "%s/%s" % (bak_path, index_file)
396 assert len (failed) == self.FAILURES
398 # with one file missing
401 for key, value in self.hash.items ():
402 kkey = "%s/%s" % (self.dst_path, key)
403 if os.path.exists (kkey):
404 if value != self.md5sum (kkey):
405 mismatch.append (key)
409 # usually, an object whose extraction fails will not be found on
410 # disk afterwards so the number of failures equals that of missing
411 # files. however, some modes will create partial files for objects
412 # spanning multiple volumes that contain the parts whose checksums
414 assert len (missing) == (self.MISSING if self.MISSING is not None
416 assert len (mismatch) == self.MISMATCHES
418 shutil.rmtree (self.dst_path)
421 class RescueTest (DefectiveTest):
423 Rescue: restore corrupt backups from backup set that is damaged to a degree
424 that the index file is worthless.
427 def test_rescue_corrupt (self):
429 Perform various damaging actions that cause unreadable objects, then
430 attempt to extract objects regardless.
432 mode = self.COMPRESSION or "#"
433 bak_path, backup_file, backup_full, index_file = \
434 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
437 # add n files for one nth the volume size each, corrected
438 # for metadata and tar block overhead
439 fsiz = int ( ( TEST_VOLSIZ
440 / (TEST_FILESPERVOL * VOLUME_OVERHEAD))
442 fcnt = (self.VOLUMES - 1) * TEST_FILESPERVOL
443 for i in range (fcnt):
444 nvol, invol = divmod(i, TEST_FILESPERVOL)
445 f = "dummy_vol_%d_n_%0.2d" % (nvol, invol)
446 self.hash [f] = self.create_file ("%s/%s"
447 % (self.src_path, f),
451 vname = partial (self.default_volume_name, backup_file)
452 dtar = deltatar.DeltaTar (mode=mode,
454 password=self.PASSWORD,
455 index_name_func=lambda _: index_file,
456 volume_name_func=vname)
458 dtar.create_full_backup \
459 (source_path=self.src_path, backup_path=bak_path,
462 if self.PASSWORD is not None:
463 # ensure all files are at least superficially in PDT format
464 for f in os.listdir (bak_path):
465 assert is_pdt_encrypted ("%s/%s" % (bak_path, f))
467 # first restore must succeed
468 dtar.restore_backup(target_path=self.dst_path,
469 backup_indexes_paths=[
470 "%s/%s" % (bak_path, index_file)
472 for key, value in self.hash.items ():
473 f = "%s/%s" % (self.dst_path, key)
474 assert os.path.exists (f)
475 assert value == self.md5sum (f)
476 shutil.rmtree (self.dst_path)
477 shutil.rmtree (self.src_path)
479 self.CORRUPT (backup_full,
480 self.COMPRESSION is not None,
481 self.PASSWORD is not None)
483 # normal restore must fail
485 dtar.restore_backup(target_path=self.dst_path,
486 backup_tar_path=backup_full)
487 except tarfile.CompressionError:
488 if self.PASSWORD is not None or self.COMPRESSION is not None:
492 except tarfile.ReadError:
493 # can happen with all three modes
495 except tarfile.DecryptionError:
496 if self.PASSWORD is not None:
501 os.chdir (self.pwd) # not restored due to the error above
502 # but recover will succeed
503 failed = dtar.rescue_backup(target_path=self.dst_path,
504 backup_tar_path=backup_full)
505 # with one file missing
508 for key, value in self.hash.items ():
509 kkey = "%s/%s" % (self.dst_path, key)
510 if os.path.exists (kkey):
511 if value != self.md5sum (kkey):
512 mismatch.append (key)
516 assert len (failed) == self.FAILURES
517 assert len (missing) == (self.MISSING if self.MISSING is not None
519 assert len (mismatch) == self.MISMATCHES
521 shutil.rmtree (self.dst_path)
524 class GenIndexTest (DefectiveTest):
526 Deducing an index for a backup with tarfile.
529 def test_gen_index (self):
531 Create backup, leave it unharmed, then generate an index.
533 mode = self.COMPRESSION or "#"
534 bak_path, backup_file, backup_full, index_file = \
535 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
537 vname = partial (self.default_volume_name, backup_file)
538 dtar = deltatar.DeltaTar (mode=mode,
540 password=self.PASSWORD,
541 index_name_func=lambda _: index_file,
542 volume_name_func=vname)
544 dtar.create_full_backup \
545 (source_path=self.src_path, backup_path=bak_path,
548 psidx = tarfile.gen_rescue_index (backup_full, mode, password=self.PASSWORD)
550 assert len (psidx) == len (self.hash)
553 ###############################################################################
555 ###############################################################################
557 class RecoverCorruptPayloadTestBase (RecoverTest):
560 FAILURES = 0 # tarfile will restore but corrupted, as
561 MISMATCHES = 1 # revealed by the hash
563 class RecoverCorruptPayloadSingleTest (RecoverCorruptPayloadTestBase):
566 class RecoverCorruptPayloadMultiTest (RecoverCorruptPayloadTestBase):
570 class RecoverCorruptPayloadGZTestBase (RecoverTest):
576 class RecoverCorruptPayloadGZSingleTest (RecoverCorruptPayloadGZTestBase):
579 class RecoverCorruptPayloadGZMultiTest (RecoverCorruptPayloadGZTestBase):
583 class RecoverCorruptPayloadGZAESTestBase (RecoverTest):
585 PASSWORD = TEST_PASSWORD
589 class RecoverCorruptPayloadGZAESSingleTest (RecoverCorruptPayloadGZAESTestBase):
592 class RecoverCorruptPayloadGZAESMultiTest (RecoverCorruptPayloadGZAESTestBase):
596 class RecoverCorruptHeaderTestBase (RecoverTest):
600 CORRUPT = corrupt_header
603 class RecoverCorruptHeaderSingleTest (RecoverCorruptHeaderTestBase):
606 class RecoverCorruptHeaderMultiTest (RecoverCorruptHeaderTestBase):
610 class RecoverCorruptHeaderGZTestBase (RecoverTest):
614 CORRUPT = corrupt_header
617 class RecoverCorruptHeaderGZSingleTest (RecoverCorruptHeaderGZTestBase):
620 class RecoverCorruptHeaderGZMultiTest (RecoverCorruptHeaderGZTestBase):
624 class RecoverCorruptHeaderGZAESTestBase (RecoverTest):
626 PASSWORD = TEST_PASSWORD
628 CORRUPT = corrupt_header
631 class RecoverCorruptHeaderGZAESSingleTest (RecoverCorruptHeaderGZAESTestBase):
634 class RecoverCorruptHeaderGZAESMultiTest (RecoverCorruptHeaderGZAESTestBase):
638 class RecoverCorruptEntireHeaderTestBase (RecoverTest):
642 CORRUPT = corrupt_entire_header
645 class RecoverCorruptEntireHeaderSingleTest (RecoverCorruptEntireHeaderTestBase):
648 class RecoverCorruptEntireHeaderMultiTest (RecoverCorruptEntireHeaderTestBase):
652 class RecoverCorruptEntireHeaderGZTestBase (RecoverTest):
656 CORRUPT = corrupt_entire_header
659 class RecoverCorruptEntireHeaderGZSingleTest (RecoverCorruptEntireHeaderGZTestBase):
662 class RecoverCorruptEntireHeaderGZMultiTest (RecoverCorruptEntireHeaderGZTestBase):
666 class RecoverCorruptEntireHeaderGZAESTestBase (RecoverTest):
668 PASSWORD = TEST_PASSWORD
670 CORRUPT = corrupt_entire_header
673 class RecoverCorruptEntireHeaderGZAESSingleTest (RecoverCorruptEntireHeaderGZAESTestBase):
676 class RecoverCorruptEntireHeaderGZAESMultiTest (RecoverCorruptEntireHeaderGZAESTestBase):
680 class RecoverCorruptTrailingDataTestBase (RecoverTest):
681 # plain Tar is indifferent against traling data and the results
686 CORRUPT = corrupt_trailing_data
689 class RecoverCorruptTrailingDataSingleTest (RecoverCorruptTrailingDataTestBase):
692 class RecoverCorruptTrailingDataMultiTest (RecoverCorruptTrailingDataTestBase):
693 # the last object in first archive has extra bytes somewhere in the
694 # middle because tar itself performs no data checksumming.
699 class RecoverCorruptTrailingDataGZTestBase (RecoverTest):
700 # reading past the final object will cause decompression failure;
701 # all objects except for the last survive unharmed though
705 CORRUPT = corrupt_trailing_data
708 class RecoverCorruptTrailingDataGZSingleTest (RecoverCorruptTrailingDataGZTestBase):
711 class RecoverCorruptTrailingDataGZMultiTest (RecoverCorruptTrailingDataGZTestBase):
713 # the last file of the first volume will only contain the data of the
714 # second part which is contained in the second volume. this happens
715 # because the CRC32 is wrong for the first part so it gets discarded, then
716 # the object is recreated from the first header of the second volume,
717 # containing only the remainder of the data.
722 class RecoverCorruptTrailingDataGZAESTestBase (RecoverTest):
724 PASSWORD = TEST_PASSWORD
726 CORRUPT = corrupt_trailing_data
729 class RecoverCorruptTrailingDataGZAESSingleTest (RecoverCorruptTrailingDataGZAESTestBase):
732 class RecoverCorruptTrailingDataGZAESMultiTest (RecoverCorruptTrailingDataGZAESTestBase):
736 class RecoverCorruptVolumeBaseTest (RecoverTest):
740 CORRUPT = corrupt_volume
743 class RecoverCorruptVolumeTest (RecoverCorruptVolumeBaseTest):
746 class RecoverCorruptVolumeGZTest (RecoverCorruptVolumeBaseTest):
749 class RecoverCorruptVolumeGZAESTest (RecoverCorruptVolumeBaseTest):
751 PASSWORD = TEST_PASSWORD
754 class RecoverCorruptHoleBaseTest (RecoverTest):
756 Cut bytes from the middle of a volume.
758 Index-based recovery works only up to the hole.
763 CORRUPT = corrupt_hole
764 VOLUMES = 2 # request two vols to swell up the first one
767 class RecoverCorruptHoleTest (RecoverCorruptHoleBaseTest):
770 class RecoverCorruptHoleGZTest (RecoverCorruptHoleBaseTest):
774 class RecoverCorruptHoleGZAESTest (RecoverCorruptHoleBaseTest):
776 PASSWORD = TEST_PASSWORD
779 ###############################################################################
781 ###############################################################################
783 class RescueCorruptHoleBaseTest (RescueTest):
785 Cut bytes from the middle of a volume.
790 CORRUPT = corrupt_hole
791 VOLUMES = 2 # request two vols to swell up the first one
792 MISMATCHES = 2 # intersected by hole
793 MISSING = 1 # excised by hole
795 class RescueCorruptHoleTest (RescueCorruptHoleBaseTest):
798 class RescueCorruptHoleGZTest (RescueCorruptHoleBaseTest):
800 # the decompressor explodes in our face processing the first dummy, nothing
801 # we can do to recover
804 class RescueCorruptHoleGZAESTest (RescueCorruptHoleBaseTest):
806 PASSWORD = TEST_PASSWORD
807 # again, ignoring the crypto errors yields a bad zlib stream causing the
808 # decompressor to abort where the hole begins; the file is extracted up
809 # to this point though
813 class RescueCorruptHeaderCTSizeGZAESTest (RescueTest):
815 PASSWORD = TEST_PASSWORD
817 CORRUPT = corrupt_ctsize
821 class RescueCorruptLeadingGarbageTestBase (RescueTest):
822 # plain Tar is indifferent against traling data and the results
827 CORRUPT = corrupt_leading_garbage
830 class RescueCorruptLeadingGarbageSingleTest (RescueCorruptLeadingGarbageTestBase):
833 class RescueCorruptLeadingGarbageMultiTest (RescueCorruptLeadingGarbageTestBase):
834 # the last object in first archive has extra bytes somewhere in the
835 # middle because tar itself performs no data checksumming.
840 ###############################################################################
842 ###############################################################################
844 class GenIndexIntactBaseTest (GenIndexTest):
855 class GenIndexIntactTest (GenIndexIntactBaseTest):
858 class GenIndexIntactGZTest (GenIndexIntactBaseTest):
862 class GenIndexIntactGZAESTest (GenIndexIntactBaseTest):
864 PASSWORD = TEST_PASSWORD