6 from functools import partial
8 import deltatar.deltatar as deltatar
9 import deltatar.crypto as crypto
10 import deltatar.tarfile as tarfile
12 from . import BaseTest
14 TEST_PASSWORD = "test1234"
17 VOLUME_OVERHEAD = 1.4 # account for tar overhead when fitting files into
18 # volumes; this is black magic
21 ###############################################################################
23 ###############################################################################
25 def flip_bits (fname, off, b=0x01, n=1):
27 Open file *fname* at offset *off*, replacing the next *n* bytes with
28 their values xor’ed with *b*.
30 fd = os.open (fname, os.O_RDWR)
33 pos = os.lseek (fd, off, os.SEEK_SET)
35 chunk = os.read (fd, n)
36 chunk = bytes (map (lambda v: v ^ b, chunk))
37 pos = os.lseek (fd, off, os.SEEK_SET)
44 def gz_header_size (fname, off=0):
46 Determine the length of the gzip header starting at *off* in file fname.
48 The header is variable length because it may contain the filename as NUL
51 # length so we need to determine where the actual payload starts
52 off = tarfile.GZ_HEADER_SIZE
53 fd = os.open (fname, os.O_RDONLY)
56 pos = os.lseek (fd, off, os.SEEK_SET)
58 while os.read (fd, 1)[0] != 0:
60 pos = os.lseek (fd, off, os.SEEK_SET)
68 def is_pdt_encrypted (fname):
70 Returns true if the file contains at least one PDT header plus enough
74 with open (fname, "rb") as st:
75 hdr = crypto.hdr_read_stream (st)
77 assert (len (st.read (siz)) == siz)
78 except Exception as exn:
83 ###############################################################################
84 ## corruption simulators ##
85 ###############################################################################
87 class UndefinedTest (Exception):
88 """No test available for the asked combination of parameters."""
90 def corrupt_header (_, fname, compress, encrypt):
92 Modify a significant byte in the object header of the format.
94 if encrypt is True: # damage GCM tag
95 flip_bits (fname, crypto.HDR_OFF_TAG + 1)
96 elif compress is True: # invalidate magic
98 else: # Fudge checksum. From tar(5):
100 # struct header_gnu_tar {
109 flip_bits (fname, 100 + 8 + 8 + 8 + 12 + 12 + 1)
112 def corrupt_ctsize (_, fname, compress, encrypt):
114 Blow up the size of an object so as to cause its apparent payload to leak
118 # damage lowest bit of second least significant byte of size field;
119 # this effectively sets the ciphertext size to 422, causing it to
120 # extend over the next object into the third one.
121 return flip_bits (fname, crypto.HDR_OFF_CTSIZE + 1, b=0x01)
122 raise UndefinedTest ("corrupt_ctsize %s %s %s" % (fname, compress, encrypt))
125 def corrupt_entire_header (_, fname, compress, encrypt):
127 Flip all bits in the first object header.
130 flip_bits (fname, 0, 0xff, crypto.PDTCRYPT_HDR_SIZE)
131 elif compress is True: # invalidate magic
132 flip_bits (fname, 0, 0xff, gz_header_size (fname))
134 flip_bits (fname, 0, 0xff, tarfile.BLOCKSIZE)
137 def corrupt_payload_start (_, fname, compress, encrypt):
139 Modify the byte following the object header structure of the format.
142 flip_bits (fname, crypto.PDTCRYPT_HDR_SIZE + 1)
143 elif compress is True:
144 flip_bits (fname, gz_header_size (fname) + 1)
146 flip_bits (fname, tarfile.BLOCKSIZE + 1)
149 def corrupt_leading_garbage (_, fname, compress, encrypt):
151 Prepend junk to file.
153 aname = os.path.abspath (fname)
154 infd = os.open (fname, os.O_RDONLY)
155 size = os.lseek (infd, 0, os.SEEK_END)
156 assert os.lseek (infd, 0, os.SEEK_SET) == 0
157 outfd = os.open (os.path.dirname (aname), os.O_WRONLY | os.O_TMPFILE,
158 stat.S_IRUSR | stat.S_IWUSR)
159 junk = os.urandom (42)
161 # write new file with garbage prepended
163 os.write (outfd, junk) # junk first
166 data = os.read (infd, TEST_BLOCKSIZE)
167 os.write (outfd, data)
170 assert os.lseek (outfd, 0, os.SEEK_CUR) == done
172 # close and free old file
176 # install the new file in its place, atomically
177 path = "/proc/self/fd/%d" % outfd
178 os.link (path, aname, src_dir_fd=0, follow_symlinks=True)
182 def corrupt_trailing_data (_, fname, compress, encrypt):
184 Modify the byte following the object header structure of the format.
186 junk = os.urandom (42)
187 fd = os.open (fname, os.O_WRONLY | os.O_APPEND)
192 def corrupt_volume (_, fname, compress, encrypt):
194 Zero out an entire volume.
196 fd = os.open (fname, os.O_WRONLY)
197 size = os.lseek (fd, 0, os.SEEK_END)
198 assert os.lseek (fd, 0, os.SEEK_SET) == 0
199 zeros = bytes (b'\x00' * TEST_BLOCKSIZE)
201 todo = min (size, TEST_BLOCKSIZE)
202 os.write (fd, zeros [:todo])
207 def corrupt_hole (_, fname, compress, encrypt):
209 Cut file in three pieces, reassemble without the middle one.
211 aname = os.path.abspath (fname)
212 infd = os.open (fname, os.O_RDONLY)
213 size = os.lseek (infd, 0, os.SEEK_END)
214 assert os.lseek (infd, 0, os.SEEK_SET) == 0
215 assert size > 3 * TEST_BLOCKSIZE
216 hole = (size / 3, size * 2 / 3)
217 outfd = os.open (os.path.dirname (aname), os.O_WRONLY | os.O_TMPFILE,
218 stat.S_IRUSR | stat.S_IWUSR)
222 data = os.read (infd, TEST_BLOCKSIZE)
223 if done < hole [0] or hole [1] < done:
224 # only copy from outside hole
225 os.write (outfd, data)
231 path = "/proc/self/fd/%d" % outfd
232 os.link (path, aname, src_dir_fd=0, follow_symlinks=True)
235 def immaculate (_, _fname, _compress, _encrypt):
241 ###############################################################################
243 ###############################################################################
245 class DefectiveTest (BaseTest):
247 Disaster recovery: restore corrupt backups.
252 FAILURES = 0 # files that could not be restored
253 MISMATCHES = 0 # files that were restored but corrupted
254 CORRUPT = corrupt_payload_start
256 MISSING = None # normally the number of failures
261 Create base test data
263 self.pwd = os.getcwd()
264 self.dst_path = "source_dir"
265 self.src_path = "%s2" % self.dst_path
268 os.system('rm -rf target_dir source_dir* backup_dir* huge')
269 os.makedirs (self.src_path)
273 self.hash [f] = self.create_file ("%s/%s"
274 % (self.src_path, f), 5 + i)
279 Remove temporal files created by unit tests and reset globals.
282 os.system("rm -rf source_dir source_dir2 backup_dir*")
286 def default_volume_name (backup_file, _x, _y, n, *a, **kwa):
287 return backup_file % n
289 def gen_file_names (self, comp, pw):
290 bak_path = "backup_dir"
291 backup_file = "the_full_backup_%0.2d.tar"
292 backup_full = ("%s/%s" % (bak_path, backup_file)) % 0
293 index_file = "the_full_index"
295 if self.COMPRESSION is not None:
300 if self.PASSWORD is not None:
301 backup_file = "%s.%s" % (backup_file, deltatar.PDTCRYPT_EXTENSION)
302 backup_full = "%s.%s" % (backup_full, deltatar.PDTCRYPT_EXTENSION)
303 index_file = "%s.%s" % (index_file , deltatar.PDTCRYPT_EXTENSION)
305 return bak_path, backup_file, backup_full, index_file
308 def gen_multivol (self, nvol):
309 # add n files for one nth the volume size each, corrected
310 # for metadata and tar block overhead
311 fsiz = int ( ( TEST_VOLSIZ
312 / (TEST_FILESPERVOL * VOLUME_OVERHEAD))
314 fcnt = (self.VOLUMES - 1) * TEST_FILESPERVOL
315 for i in range (fcnt):
316 nvol, invol = divmod(i, TEST_FILESPERVOL)
317 f = "dummy_vol_%d_n_%0.2d" % (nvol, invol)
318 self.hash [f] = self.create_file ("%s/%s"
319 % (self.src_path, f),
324 class RecoverTest (DefectiveTest):
326 Recover: restore corrupt backups from index file information.
329 def test_recover_corrupt (self):
331 Perform various damaging actions that cause unreadable objects.
333 Expects the extraction to fail in normal mode. With disaster recovery,
334 extraction must succeed, and exactly one file must be missing.
336 mode = self.COMPRESSION or "#"
337 bak_path, backup_file, backup_full, index_file = \
338 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
341 self.gen_multivol (self.VOLUMES)
343 vname = partial (self.default_volume_name, backup_file)
344 dtar = deltatar.DeltaTar (mode=mode,
346 password=self.PASSWORD,
347 index_name_func=lambda _: index_file,
348 volume_name_func=vname)
350 dtar.create_full_backup \
351 (source_path=self.src_path, backup_path=bak_path,
354 if self.PASSWORD is not None:
355 # ensure all files are at least superficially in PDT format
356 for f in os.listdir (bak_path):
357 assert is_pdt_encrypted ("%s/%s" % (bak_path, f))
359 # first restore must succeed
360 dtar.restore_backup(target_path=self.dst_path,
361 backup_indexes_paths=[
362 "%s/%s" % (bak_path, index_file)
364 for key, value in self.hash.items ():
365 f = "%s/%s" % (self.dst_path, key)
366 assert os.path.exists (f)
367 assert value == self.md5sum (f)
368 shutil.rmtree (self.dst_path)
369 shutil.rmtree (self.src_path)
371 self.CORRUPT (backup_full,
372 self.COMPRESSION is not None,
373 self.PASSWORD is not None)
375 # normal restore must fail
377 dtar.restore_backup(target_path=self.dst_path,
378 backup_tar_path=backup_full)
379 except tarfile.CompressionError:
380 if self.PASSWORD is not None or self.COMPRESSION is not None:
384 except tarfile.ReadError:
385 # can happen with all three modes
387 except tarfile.DecryptionError:
388 if self.PASSWORD is not None:
393 os.chdir (self.pwd) # not restored due to the error above
394 # but recover will succeed
395 failed = dtar.recover_backup(target_path=self.dst_path,
396 backup_indexes_paths=[
397 "%s/%s" % (bak_path, index_file)
400 assert len (failed) == self.FAILURES
402 # with one file missing
405 for key, value in self.hash.items ():
406 kkey = "%s/%s" % (self.dst_path, key)
407 if os.path.exists (kkey):
408 if value != self.md5sum (kkey):
409 mismatch.append (key)
413 # usually, an object whose extraction fails will not be found on
414 # disk afterwards so the number of failures equals that of missing
415 # files. however, some modes will create partial files for objects
416 # spanning multiple volumes that contain the parts whose checksums
418 assert len (missing) == (self.MISSING if self.MISSING is not None
420 assert len (mismatch) == self.MISMATCHES
422 shutil.rmtree (self.dst_path)
425 class RescueTest (DefectiveTest):
427 Rescue: restore corrupt backups from backup set that is damaged to a degree
428 that the index file is worthless.
431 def test_rescue_corrupt (self):
433 Perform various damaging actions that cause unreadable objects, then
434 attempt to extract objects regardless.
436 mode = self.COMPRESSION or "#"
437 bak_path, backup_file, backup_full, index_file = \
438 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
441 self.gen_multivol (self.VOLUMES)
443 vname = partial (self.default_volume_name, backup_file)
444 dtar = deltatar.DeltaTar (mode=mode,
446 password=self.PASSWORD,
447 index_name_func=lambda _: index_file,
448 volume_name_func=vname)
450 dtar.create_full_backup \
451 (source_path=self.src_path, backup_path=bak_path,
454 if self.PASSWORD is not None:
455 # ensure all files are at least superficially in PDT format
456 for f in os.listdir (bak_path):
457 assert is_pdt_encrypted ("%s/%s" % (bak_path, f))
459 # first restore must succeed
460 dtar.restore_backup(target_path=self.dst_path,
461 backup_indexes_paths=[
462 "%s/%s" % (bak_path, index_file)
464 for key, value in self.hash.items ():
465 f = "%s/%s" % (self.dst_path, key)
466 assert os.path.exists (f)
467 assert value == self.md5sum (f)
468 shutil.rmtree (self.dst_path)
469 shutil.rmtree (self.src_path)
471 self.CORRUPT (backup_full,
472 self.COMPRESSION is not None,
473 self.PASSWORD is not None)
475 # normal restore must fail
477 dtar.restore_backup(target_path=self.dst_path,
478 backup_tar_path=backup_full)
479 except tarfile.CompressionError:
480 if self.PASSWORD is not None or self.COMPRESSION is not None:
484 except tarfile.ReadError:
485 # can happen with all three modes
487 except tarfile.DecryptionError:
488 if self.PASSWORD is not None:
493 os.chdir (self.pwd) # not restored due to the error above
494 # but recover will succeed
495 failed = dtar.rescue_backup(target_path=self.dst_path,
496 backup_tar_path=backup_full)
497 # with one file missing
500 for key, value in self.hash.items ():
501 kkey = "%s/%s" % (self.dst_path, key)
502 if os.path.exists (kkey):
503 if value != self.md5sum (kkey):
504 mismatch.append (key)
508 assert len (failed) == self.FAILURES
509 assert len (missing) == (self.MISSING if self.MISSING is not None
511 assert len (mismatch) == self.MISMATCHES
513 shutil.rmtree (self.dst_path)
516 class GenIndexTest (DefectiveTest):
518 Deducing an index for a backup with tarfile.
521 def test_gen_index (self):
523 Create backup, leave it unharmed, then generate an index.
525 mode = self.COMPRESSION or "#"
526 bak_path, backup_file, backup_full, index_file = \
527 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
530 self.gen_multivol (self.VOLUMES)
532 vname = partial (self.default_volume_name, backup_file)
533 dtar = deltatar.DeltaTar (mode=mode,
535 password=self.PASSWORD,
536 index_name_func=lambda _: index_file,
537 volume_name_func=vname)
539 dtar.create_full_backup \
540 (source_path=self.src_path, backup_path=bak_path,
543 def gen_volume_name (nvol):
544 return os.path.join (bak_path, vname (backup_full, True, nvol))
546 psidx = tarfile.gen_rescue_index (gen_volume_name,
548 password=self.PASSWORD)
550 # correct for objects spanning volumes: these are treated as separate
552 assert len (psidx) - self.VOLUMES + 1 == len (self.hash)
555 ###############################################################################
557 ###############################################################################
559 class RecoverCorruptPayloadTestBase (RecoverTest):
562 FAILURES = 0 # tarfile will restore but corrupted, as
563 MISMATCHES = 1 # revealed by the hash
565 class RecoverCorruptPayloadSingleTest (RecoverCorruptPayloadTestBase):
568 class RecoverCorruptPayloadMultiTest (RecoverCorruptPayloadTestBase):
572 class RecoverCorruptPayloadGZTestBase (RecoverTest):
578 class RecoverCorruptPayloadGZSingleTest (RecoverCorruptPayloadGZTestBase):
581 class RecoverCorruptPayloadGZMultiTest (RecoverCorruptPayloadGZTestBase):
585 class RecoverCorruptPayloadGZAESTestBase (RecoverTest):
587 PASSWORD = TEST_PASSWORD
591 class RecoverCorruptPayloadGZAESSingleTest (RecoverCorruptPayloadGZAESTestBase):
594 class RecoverCorruptPayloadGZAESMultiTest (RecoverCorruptPayloadGZAESTestBase):
598 class RecoverCorruptHeaderTestBase (RecoverTest):
602 CORRUPT = corrupt_header
605 class RecoverCorruptHeaderSingleTest (RecoverCorruptHeaderTestBase):
608 class RecoverCorruptHeaderMultiTest (RecoverCorruptHeaderTestBase):
612 class RecoverCorruptHeaderGZTestBase (RecoverTest):
616 CORRUPT = corrupt_header
619 class RecoverCorruptHeaderGZSingleTest (RecoverCorruptHeaderGZTestBase):
622 class RecoverCorruptHeaderGZMultiTest (RecoverCorruptHeaderGZTestBase):
626 class RecoverCorruptHeaderGZAESTestBase (RecoverTest):
628 PASSWORD = TEST_PASSWORD
630 CORRUPT = corrupt_header
633 class RecoverCorruptHeaderGZAESSingleTest (RecoverCorruptHeaderGZAESTestBase):
636 class RecoverCorruptHeaderGZAESMultiTest (RecoverCorruptHeaderGZAESTestBase):
640 class RecoverCorruptEntireHeaderTestBase (RecoverTest):
644 CORRUPT = corrupt_entire_header
647 class RecoverCorruptEntireHeaderSingleTest (RecoverCorruptEntireHeaderTestBase):
650 class RecoverCorruptEntireHeaderMultiTest (RecoverCorruptEntireHeaderTestBase):
654 class RecoverCorruptEntireHeaderGZTestBase (RecoverTest):
658 CORRUPT = corrupt_entire_header
661 class RecoverCorruptEntireHeaderGZSingleTest (RecoverCorruptEntireHeaderGZTestBase):
664 class RecoverCorruptEntireHeaderGZMultiTest (RecoverCorruptEntireHeaderGZTestBase):
668 class RecoverCorruptEntireHeaderGZAESTestBase (RecoverTest):
670 PASSWORD = TEST_PASSWORD
672 CORRUPT = corrupt_entire_header
675 class RecoverCorruptEntireHeaderGZAESSingleTest (RecoverCorruptEntireHeaderGZAESTestBase):
678 class RecoverCorruptEntireHeaderGZAESMultiTest (RecoverCorruptEntireHeaderGZAESTestBase):
682 class RecoverCorruptTrailingDataTestBase (RecoverTest):
683 # plain Tar is indifferent against traling data and the results
688 CORRUPT = corrupt_trailing_data
691 class RecoverCorruptTrailingDataSingleTest (RecoverCorruptTrailingDataTestBase):
694 class RecoverCorruptTrailingDataMultiTest (RecoverCorruptTrailingDataTestBase):
695 # the last object in first archive has extra bytes somewhere in the
696 # middle because tar itself performs no data checksumming.
701 class RecoverCorruptTrailingDataGZTestBase (RecoverTest):
702 # reading past the final object will cause decompression failure;
703 # all objects except for the last survive unharmed though
707 CORRUPT = corrupt_trailing_data
710 class RecoverCorruptTrailingDataGZSingleTest (RecoverCorruptTrailingDataGZTestBase):
713 class RecoverCorruptTrailingDataGZMultiTest (RecoverCorruptTrailingDataGZTestBase):
715 # the last file of the first volume will only contain the data of the
716 # second part which is contained in the second volume. this happens
717 # because the CRC32 is wrong for the first part so it gets discarded, then
718 # the object is recreated from the first header of the second volume,
719 # containing only the remainder of the data.
724 class RecoverCorruptTrailingDataGZAESTestBase (RecoverTest):
726 PASSWORD = TEST_PASSWORD
728 CORRUPT = corrupt_trailing_data
731 class RecoverCorruptTrailingDataGZAESSingleTest (RecoverCorruptTrailingDataGZAESTestBase):
734 class RecoverCorruptTrailingDataGZAESMultiTest (RecoverCorruptTrailingDataGZAESTestBase):
738 class RecoverCorruptVolumeBaseTest (RecoverTest):
742 CORRUPT = corrupt_volume
745 class RecoverCorruptVolumeTest (RecoverCorruptVolumeBaseTest):
748 class RecoverCorruptVolumeGZTest (RecoverCorruptVolumeBaseTest):
751 class RecoverCorruptVolumeGZAESTest (RecoverCorruptVolumeBaseTest):
753 PASSWORD = TEST_PASSWORD
756 class RecoverCorruptHoleBaseTest (RecoverTest):
758 Cut bytes from the middle of a volume.
760 Index-based recovery works only up to the hole.
765 CORRUPT = corrupt_hole
766 VOLUMES = 2 # request two vols to swell up the first one
769 class RecoverCorruptHoleTest (RecoverCorruptHoleBaseTest):
772 class RecoverCorruptHoleGZTest (RecoverCorruptHoleBaseTest):
776 class RecoverCorruptHoleGZAESTest (RecoverCorruptHoleBaseTest):
778 PASSWORD = TEST_PASSWORD
781 ###############################################################################
783 ###############################################################################
785 class RescueCorruptHoleBaseTest (RescueTest):
787 Cut bytes from the middle of a volume.
792 CORRUPT = corrupt_hole
793 VOLUMES = 2 # request two vols to swell up the first one
794 MISMATCHES = 2 # intersected by hole
795 MISSING = 1 # excised by hole
797 class RescueCorruptHoleTest (RescueCorruptHoleBaseTest):
800 class RescueCorruptHoleGZTest (RescueCorruptHoleBaseTest):
802 # the decompressor explodes in our face processing the first dummy, nothing
803 # we can do to recover
806 class RescueCorruptHoleGZAESTest (RescueCorruptHoleBaseTest):
808 PASSWORD = TEST_PASSWORD
809 # again, ignoring the crypto errors yields a bad zlib stream causing the
810 # decompressor to abort where the hole begins; the file is extracted up
811 # to this point though
815 class RescueCorruptHeaderCTSizeGZAESTest (RescueTest):
817 PASSWORD = TEST_PASSWORD
819 CORRUPT = corrupt_ctsize
823 class RescueCorruptLeadingGarbageTestBase (RescueTest):
824 # plain Tar is indifferent against traling data and the results
829 CORRUPT = corrupt_leading_garbage
832 class RescueCorruptLeadingGarbageSingleTest (RescueCorruptLeadingGarbageTestBase):
835 class RescueCorruptLeadingGarbageMultiTest (RescueCorruptLeadingGarbageTestBase):
836 # the last object in first archive has extra bytes somewhere in the
837 # middle because tar itself performs no data checksumming.
842 ###############################################################################
844 ###############################################################################
846 class GenIndexIntactBaseTest (GenIndexTest):
856 class GenIndexIntactSingleTest (GenIndexIntactBaseTest):
859 class GenIndexIntactSingleGZTest (GenIndexIntactBaseTest):
863 class GenIndexIntactSingleGZAESTest (GenIndexIntactBaseTest):
865 PASSWORD = TEST_PASSWORD
868 class GenIndexIntactMultiTest (GenIndexIntactBaseTest):
872 class GenIndexIntactMultiGZTest (GenIndexIntactBaseTest):
877 class GenIndexIntactMultiGZAESTest (GenIndexIntactBaseTest):
880 PASSWORD = TEST_PASSWORD