6 from functools import partial
8 import deltatar.deltatar as deltatar
9 import deltatar.crypto as crypto
10 import deltatar.tarfile as tarfile
12 from . import BaseTest
14 TEST_PASSWORD = "test1234"
17 VOLUME_OVERHEAD = 1.4 # account for tar overhead when fitting files into
18 # volumes; this is black magic
21 ###############################################################################
23 ###############################################################################
25 def flip_bits (fname, off, b=0x01, n=1):
27 Open file *fname* at offset *off*, replacing the next *n* bytes with
28 their values xor’ed with *b*.
30 fd = os.open (fname, os.O_RDWR)
33 pos = os.lseek (fd, off, os.SEEK_SET)
35 chunk = os.read (fd, n)
36 chunk = bytes (map (lambda v: v ^ b, chunk))
37 pos = os.lseek (fd, off, os.SEEK_SET)
44 def gz_header_size (fname, off=0):
46 Determine the length of the gzip header starting at *off* in file fname.
48 The header is variable length because it may contain the filename as NUL
51 # length so we need to determine where the actual payload starts
52 off = tarfile.GZ_HEADER_SIZE
53 fd = os.open (fname, os.O_RDONLY)
56 pos = os.lseek (fd, off, os.SEEK_SET)
58 while os.read (fd, 1)[0] != 0:
60 pos = os.lseek (fd, off, os.SEEK_SET)
68 def is_pdt_encrypted (fname):
70 Returns true if the file contains at least one PDT header plus enough
74 with open (fname, "rb") as st:
75 hdr = crypto.hdr_read_stream (st)
77 assert (len (st.read (siz)) == siz)
78 except Exception as exn:
83 ###############################################################################
84 ## corruption simulators ##
85 ###############################################################################
87 def corrupt_header (_, fname, compress, encrypt):
89 Modify a significant byte in the object header of the format.
91 if encrypt is True: # damage GCM tag
92 flip_bits (fname, crypto.HDR_OFF_TAG + 1)
93 elif compress is True: # invalidate magic
95 else: # Fudge checksum. From tar(5):
97 # struct header_gnu_tar {
106 flip_bits (fname, 100 + 8 + 8 + 8 + 12 + 12 + 1)
109 def corrupt_entire_header (_, fname, compress, encrypt):
111 Flip all bits in the first object header.
114 flip_bits (fname, 0, 0xff, crypto.PDTCRYPT_HDR_SIZE)
115 elif compress is True: # invalidate magic
116 flip_bits (fname, 0, 0xff, gz_header_size (fname))
118 flip_bits (fname, 0, 0xff, tarfile.BLOCKSIZE)
121 def corrupt_payload_start (_, fname, compress, encrypt):
123 Modify the byte following the object header structure of the format.
126 flip_bits (fname, crypto.PDTCRYPT_HDR_SIZE + 1)
127 elif compress is True:
128 flip_bits (fname, gz_header_size (fname) + 1)
130 flip_bits (fname, tarfile.BLOCKSIZE + 1)
133 def corrupt_trailing_data (_, fname, compress, encrypt):
135 Modify the byte following the object header structure of the format.
137 junk = os.urandom (42)
138 fd = os.open (fname, os.O_WRONLY | os.O_APPEND)
143 def corrupt_volume (_, fname, compress, encrypt):
145 Zero out an entire volume.
147 fd = os.open (fname, os.O_WRONLY)
148 size = os.lseek (fd, 0, os.SEEK_END)
149 assert os.lseek (fd, 0, os.SEEK_SET) == 0
150 zeros = bytes (b'\x00' * TEST_BLOCKSIZE)
152 todo = min (size, TEST_BLOCKSIZE)
153 os.write (fd, zeros [:todo])
158 def corrupt_hole (_, fname, compress, encrypt):
160 Cut file in three pieces, reassemble without the middle one.
162 aname = os.path.abspath (fname)
163 infd = os.open (fname, os.O_RDONLY)
164 size = os.lseek (infd, 0, os.SEEK_END)
165 assert os.lseek (infd, 0, os.SEEK_SET) == 0
166 assert size > 3 * TEST_BLOCKSIZE
167 hole = (size / 3, size * 2 / 3)
168 outfd = os.open (os.path.dirname (aname), os.O_WRONLY | os.O_TMPFILE,
169 stat.S_IRUSR | stat.S_IWUSR)
171 zeros = bytes (b'\x00' * TEST_BLOCKSIZE)
174 data = os.read (infd, TEST_BLOCKSIZE)
175 if done < hole [0] or hole [1] < done:
176 # only copy from outside hole
177 os.write (outfd, data)
183 path = "/proc/self/fd/%d" % outfd
184 os.link (path, aname, src_dir_fd=0, follow_symlinks=True)
187 def immaculate (_, _fname, _compress, _encrypt):
193 ###############################################################################
195 ###############################################################################
197 class DefectiveTest (BaseTest):
199 Disaster recovery: restore corrupt backups.
204 FAILURES = 0 # files that could not be restored
205 MISMATCHES = 0 # files that were restored but corrupted
206 CORRUPT = corrupt_payload_start
208 MISSING = None # normally the number of failures
213 Create base test data
215 self.pwd = os.getcwd()
216 self.dst_path = "source_dir"
217 self.src_path = "%s2" % self.dst_path
220 os.system('rm -rf target_dir source_dir* backup_dir* huge')
221 os.makedirs (self.src_path)
225 self.hash [f] = self.create_file ("%s/%s"
226 % (self.src_path, f), 5 + i)
231 Remove temporal files created by unit tests and reset globals.
234 os.system("rm -rf source_dir source_dir2 backup_dir*")
238 def default_volume_name (backup_file, _x, _y, n, *a, **kwa):
239 return backup_file % n
241 def gen_file_names (self, comp, pw):
242 bak_path = "backup_dir"
243 backup_file = "the_full_backup_%0.2d.tar"
244 backup_full = ("%s/%s" % (bak_path, backup_file)) % 0
245 index_file = "the_full_index"
247 if self.COMPRESSION is not None:
252 if self.PASSWORD is not None:
253 backup_file = "%s.%s" % (backup_file, deltatar.PDTCRYPT_EXTENSION)
254 backup_full = "%s.%s" % (backup_full, deltatar.PDTCRYPT_EXTENSION)
255 index_file = "%s.%s" % (index_file , deltatar.PDTCRYPT_EXTENSION)
257 return bak_path, backup_file, backup_full, index_file
260 class RecoverTest (DefectiveTest):
262 Recover: restore corrupt backups from index file information.
265 def test_recover_corrupt (self):
267 Perform various damaging actions that cause unreadable objects.
269 Expects the extraction to fail in normal mode. With disaster recovery,
270 extraction must succeed, and exactly one file must be missing.
272 mode = self.COMPRESSION or "#"
273 bak_path, backup_file, backup_full, index_file = \
274 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
277 # add n files for one nth the volume size each, corrected
278 # for metadata and tar block overhead
279 fsiz = int ( ( TEST_VOLSIZ
280 / (TEST_FILESPERVOL * VOLUME_OVERHEAD))
282 fcnt = (self.VOLUMES - 1) * TEST_FILESPERVOL
283 for i in range (fcnt):
284 nvol, invol = divmod(i, TEST_FILESPERVOL)
285 f = "dummy_vol_%d_n_%0.2d" % (nvol, invol)
286 self.hash [f] = self.create_file ("%s/%s"
287 % (self.src_path, f),
291 vname = partial (self.default_volume_name, backup_file)
292 dtar = deltatar.DeltaTar (mode=mode,
294 password=self.PASSWORD,
295 index_name_func=lambda _: index_file,
296 volume_name_func=vname)
298 dtar.create_full_backup \
299 (source_path=self.src_path, backup_path=bak_path,
302 if self.PASSWORD is not None:
303 # ensure all files are at least superficially in PDT format
304 for f in os.listdir (bak_path):
305 assert is_pdt_encrypted ("%s/%s" % (bak_path, f))
307 # first restore must succeed
308 dtar.restore_backup(target_path=self.dst_path,
309 backup_indexes_paths=[
310 "%s/%s" % (bak_path, index_file)
312 for key, value in self.hash.items ():
313 f = "%s/%s" % (self.dst_path, key)
314 assert os.path.exists (f)
315 assert value == self.md5sum (f)
316 shutil.rmtree (self.dst_path)
317 shutil.rmtree (self.src_path)
319 self.CORRUPT (backup_full,
320 self.COMPRESSION is not None,
321 self.PASSWORD is not None)
323 # normal restore must fail
325 dtar.restore_backup(target_path=self.dst_path,
326 backup_tar_path=backup_full)
327 except tarfile.CompressionError:
328 if self.PASSWORD is not None or self.COMPRESSION is not None:
332 except tarfile.ReadError:
333 # can happen with all three modes
335 except tarfile.DecryptionError:
336 if self.PASSWORD is not None:
341 os.chdir (self.pwd) # not restored due to the error above
342 # but recover will succeed
343 failed = dtar.recover_backup(target_path=self.dst_path,
344 backup_indexes_paths=[
345 "%s/%s" % (bak_path, index_file)
348 assert len (failed) == self.FAILURES
350 # with one file missing
353 for key, value in self.hash.items ():
354 kkey = "%s/%s" % (self.dst_path, key)
355 if os.path.exists (kkey):
356 if value != self.md5sum (kkey):
357 mismatch.append (key)
361 # usually, an object whose extraction fails will not be found on
362 # disk afterwards so the number of failures equals that of missing
363 # files. however, some modes will create partial files for objects
364 # spanning multiple volumes that contain the parts whose checksums
366 assert len (missing) == (self.MISSING if self.MISSING is not None
368 assert len (mismatch) == self.MISMATCHES
370 shutil.rmtree (self.dst_path)
373 class RescueTest (DefectiveTest):
375 Rescue: restore corrupt backups from backup set that is damaged to a degree
376 that the index file is worthless.
379 def test_rescue_corrupt (self):
381 Perform various damaging actions that cause unreadable objects, then
382 attempt to extract objects regardless.
384 mode = self.COMPRESSION or "#"
385 bak_path, backup_file, backup_full, index_file = \
386 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
389 # add n files for one nth the volume size each, corrected
390 # for metadata and tar block overhead
391 fsiz = int ( ( TEST_VOLSIZ
392 / (TEST_FILESPERVOL * VOLUME_OVERHEAD))
394 fcnt = (self.VOLUMES - 1) * TEST_FILESPERVOL
395 for i in range (fcnt):
396 nvol, invol = divmod(i, TEST_FILESPERVOL)
397 f = "dummy_vol_%d_n_%0.2d" % (nvol, invol)
398 self.hash [f] = self.create_file ("%s/%s"
399 % (self.src_path, f),
403 vname = partial (self.default_volume_name, backup_file)
404 dtar = deltatar.DeltaTar (mode=mode,
406 password=self.PASSWORD,
407 index_name_func=lambda _: index_file,
408 volume_name_func=vname)
410 dtar.create_full_backup \
411 (source_path=self.src_path, backup_path=bak_path,
414 if self.PASSWORD is not None:
415 # ensure all files are at least superficially in PDT format
416 for f in os.listdir (bak_path):
417 assert is_pdt_encrypted ("%s/%s" % (bak_path, f))
419 # first restore must succeed
420 dtar.restore_backup(target_path=self.dst_path,
421 backup_indexes_paths=[
422 "%s/%s" % (bak_path, index_file)
424 for key, value in self.hash.items ():
425 f = "%s/%s" % (self.dst_path, key)
426 assert os.path.exists (f)
427 assert value == self.md5sum (f)
428 shutil.rmtree (self.dst_path)
429 shutil.rmtree (self.src_path)
431 self.CORRUPT (backup_full,
432 self.COMPRESSION is not None,
433 self.PASSWORD is not None)
435 # normal restore must fail
437 dtar.restore_backup(target_path=self.dst_path,
438 backup_tar_path=backup_full)
439 except tarfile.CompressionError:
440 if self.PASSWORD is not None or self.COMPRESSION is not None:
444 except tarfile.ReadError:
445 # can happen with all three modes
447 except tarfile.DecryptionError:
448 if self.PASSWORD is not None:
453 os.chdir (self.pwd) # not restored due to the error above
454 # but recover will succeed
455 failed = dtar.rescue_backup(target_path=self.dst_path,
456 backup_tar_path=backup_full)
457 # with one file missing
460 for key, value in self.hash.items ():
461 kkey = "%s/%s" % (self.dst_path, key)
462 if os.path.exists (kkey):
463 if value != self.md5sum (kkey):
464 mismatch.append (key)
468 assert len (failed) == self.FAILURES
469 assert len (missing) == (self.MISSING if self.MISSING is not None
471 assert len (mismatch) == self.MISMATCHES
473 shutil.rmtree (self.dst_path)
476 class GenIndexTest (DefectiveTest):
478 Deducing an index for a backup with tarfile.
481 def test_gen_index (self):
483 Create backup, leave it unharmed, then generate an index.
485 mode = self.COMPRESSION or "#"
486 bak_path, backup_file, backup_full, index_file = \
487 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
489 vname = partial (self.default_volume_name, backup_file)
490 dtar = deltatar.DeltaTar (mode=mode,
492 password=self.PASSWORD,
493 index_name_func=lambda _: index_file,
494 volume_name_func=vname)
496 dtar.create_full_backup \
497 (source_path=self.src_path, backup_path=bak_path,
500 psidx = tarfile.gen_rescue_index (backup_full, mode, password=self.PASSWORD)
502 assert len (psidx) == len (self.hash)
505 ###############################################################################
507 ###############################################################################
509 class RecoverCorruptPayloadTestBase (RecoverTest):
512 FAILURES = 0 # tarfile will restore but corrupted, as
513 MISMATCHES = 1 # revealed by the hash
515 class RecoverCorruptPayloadSingleTest (RecoverCorruptPayloadTestBase):
518 class RecoverCorruptPayloadMultiTest (RecoverCorruptPayloadTestBase):
522 class RecoverCorruptPayloadGZTestBase (RecoverTest):
528 class RecoverCorruptPayloadGZSingleTest (RecoverCorruptPayloadGZTestBase):
531 class RecoverCorruptPayloadGZMultiTest (RecoverCorruptPayloadGZTestBase):
535 class RecoverCorruptPayloadGZAESTestBase (RecoverTest):
537 PASSWORD = TEST_PASSWORD
541 class RecoverCorruptPayloadGZAESSingleTest (RecoverCorruptPayloadGZAESTestBase):
544 class RecoverCorruptPayloadGZAESMultiTest (RecoverCorruptPayloadGZAESTestBase):
548 class RecoverCorruptHeaderTestBase (RecoverTest):
552 CORRUPT = corrupt_header
555 class RecoverCorruptHeaderSingleTest (RecoverCorruptHeaderTestBase):
558 class RecoverCorruptHeaderMultiTest (RecoverCorruptHeaderTestBase):
562 class RecoverCorruptHeaderGZTestBase (RecoverTest):
566 CORRUPT = corrupt_header
569 class RecoverCorruptHeaderGZSingleTest (RecoverCorruptHeaderGZTestBase):
572 class RecoverCorruptHeaderGZMultiTest (RecoverCorruptHeaderGZTestBase):
576 class RecoverCorruptHeaderGZAESTestBase (RecoverTest):
578 PASSWORD = TEST_PASSWORD
580 CORRUPT = corrupt_header
583 class RecoverCorruptHeaderGZAESSingleTest (RecoverCorruptHeaderGZAESTestBase):
586 class RecoverCorruptHeaderGZAESMultiTest (RecoverCorruptHeaderGZAESTestBase):
590 class RecoverCorruptEntireHeaderTestBase (RecoverTest):
594 CORRUPT = corrupt_entire_header
597 class RecoverCorruptEntireHeaderSingleTest (RecoverCorruptEntireHeaderTestBase):
600 class RecoverCorruptEntireHeaderMultiTest (RecoverCorruptEntireHeaderTestBase):
604 class RecoverCorruptEntireHeaderGZTestBase (RecoverTest):
608 CORRUPT = corrupt_entire_header
611 class RecoverCorruptEntireHeaderGZSingleTest (RecoverCorruptEntireHeaderGZTestBase):
614 class RecoverCorruptEntireHeaderGZMultiTest (RecoverCorruptEntireHeaderGZTestBase):
618 class RecoverCorruptEntireHeaderGZAESTestBase (RecoverTest):
620 PASSWORD = TEST_PASSWORD
622 CORRUPT = corrupt_entire_header
625 class RecoverCorruptEntireHeaderGZAESSingleTest (RecoverCorruptEntireHeaderGZAESTestBase):
628 class RecoverCorruptEntireHeaderGZAESMultiTest (RecoverCorruptEntireHeaderGZAESTestBase):
632 class RecoverCorruptTrailingDataTestBase (RecoverTest):
633 # plain Tar is indifferent against traling data and the results
638 CORRUPT = corrupt_trailing_data
641 class RecoverCorruptTrailingDataSingleTest (RecoverCorruptTrailingDataTestBase):
644 class RecoverCorruptTrailingDataMultiTest (RecoverCorruptTrailingDataTestBase):
645 # the last object in first archive has extra bytes somewhere in the
646 # middle because tar itself performs no data checksumming.
651 class RecoverCorruptTrailingDataGZTestBase (RecoverTest):
652 # reading past the final object will cause decompression failure;
653 # all objects except for the last survive unharmed though
657 CORRUPT = corrupt_trailing_data
660 class RecoverCorruptTrailingDataGZSingleTest (RecoverCorruptTrailingDataGZTestBase):
663 class RecoverCorruptTrailingDataGZMultiTest (RecoverCorruptTrailingDataGZTestBase):
665 # the last file of the first volume will only contain the data of the
666 # second part which is contained in the second volume. this happens
667 # because the CRC32 is wrong for the first part so it gets discarded, then
668 # the object is recreated from the first header of the second volume,
669 # containing only the remainder of the data.
674 class RecoverCorruptTrailingDataGZAESTestBase (RecoverTest):
676 PASSWORD = TEST_PASSWORD
678 CORRUPT = corrupt_trailing_data
681 class RecoverCorruptTrailingDataGZAESSingleTest (RecoverCorruptTrailingDataGZAESTestBase):
684 class RecoverCorruptTrailingDataGZAESMultiTest (RecoverCorruptTrailingDataGZAESTestBase):
688 class RecoverCorruptVolumeBaseTest (RecoverTest):
692 CORRUPT = corrupt_volume
695 class RecoverCorruptVolumeTest (RecoverCorruptVolumeBaseTest):
698 class RecoverCorruptVolumeGZTest (RecoverCorruptVolumeBaseTest):
701 class RecoverCorruptVolumeGZAESTest (RecoverCorruptVolumeBaseTest):
703 PASSWORD = TEST_PASSWORD
706 class RecoverCorruptHoleBaseTest (RecoverTest):
708 Cut bytes from the middle of a volume.
710 Index-based recovery works only up to the hole.
715 CORRUPT = corrupt_hole
716 VOLUMES = 2 # request two vols to swell up the first one
719 class RecoverCorruptHoleTest (RecoverCorruptHoleBaseTest):
722 class RecoverCorruptHoleGZTest (RecoverCorruptHoleBaseTest):
726 class RecoverCorruptHoleGZAESTest (RecoverCorruptHoleBaseTest):
728 PASSWORD = TEST_PASSWORD
731 ###############################################################################
733 ###############################################################################
735 class RescueCorruptHoleBaseTest (RescueTest):
737 Cut bytes from the middle of a volume.
742 CORRUPT = corrupt_hole
743 VOLUMES = 2 # request two vols to swell up the first one
744 MISMATCHES = 2 # intersected by hole
745 MISSING = 1 # excised by hole
747 class RescueCorruptHoleTest (RescueCorruptHoleBaseTest):
750 class RescueCorruptHoleGZTest (RescueCorruptHoleBaseTest):
752 # the decompressor explodes in our face processing the first dummy, nothing
753 # we can do to recover
756 class RescueCorruptHoleGZAESTest (RescueCorruptHoleBaseTest):
758 PASSWORD = TEST_PASSWORD
759 # again, ignoring the crypto errors yields a bad zlib stream causing the
760 # decompressor to abort where the hole begins; the file is extracted up
761 # to this point though
764 ###############################################################################
766 ###############################################################################
768 class GenIndexIntactBaseTest (GenIndexTest):
779 class GenIndexIntactTest (GenIndexIntactBaseTest):
782 class GenIndexIntactGZTest (GenIndexIntactBaseTest):
786 class GenIndexIntactGZAESTest (GenIndexIntactBaseTest):
788 PASSWORD = TEST_PASSWORD