6 from functools import partial
8 import deltatar.deltatar as deltatar
9 import deltatar.crypto as crypto
10 import deltatar.tarfile as tarfile
12 from . import BaseTest
14 TEST_PASSWORD = "test1234"
17 VOLUME_OVERHEAD = 1.4 # account for tar overhead when fitting files into
18 # volumes; this is black magic
21 ###############################################################################
23 ###############################################################################
25 def flip_bits (fname, off, b=0x01, n=1):
27 Open file *fname* at offset *off*, replacing the next *n* bytes with
28 their values xor’ed with *b*.
30 fd = os.open (fname, os.O_RDWR)
33 pos = os.lseek (fd, off, os.SEEK_SET)
35 chunk = os.read (fd, n)
36 chunk = bytes (map (lambda v: v ^ b, chunk))
37 pos = os.lseek (fd, off, os.SEEK_SET)
44 def gz_header_size (fname, off=0):
46 Determine the length of the gzip header starting at *off* in file fname.
48 The header is variable length because it may contain the filename as NUL
51 # length so we need to determine where the actual payload starts
52 off = tarfile.GZ_HEADER_SIZE
53 fd = os.open (fname, os.O_RDONLY)
56 pos = os.lseek (fd, off, os.SEEK_SET)
58 while os.read (fd, 1)[0] != 0:
60 pos = os.lseek (fd, off, os.SEEK_SET)
68 def is_pdt_encrypted (fname):
70 Returns true if the file contains at least one PDT header plus enough
74 with open (fname, "rb") as st:
75 hdr = crypto.hdr_read_stream (st)
77 assert (len (st.read (siz)) == siz)
78 except Exception as exn:
83 ###############################################################################
84 ## corruption simulators ##
85 ###############################################################################
87 def corrupt_header (_, fname, compress, encrypt):
89 Modify a significant byte in the object header of the format.
91 if encrypt is True: # damage GCM tag
92 flip_bits (fname, crypto.HDR_OFF_TAG + 1)
93 elif compress is True: # invalidate magic
95 else: # Fudge checksum. From tar(5):
97 # struct header_gnu_tar {
106 flip_bits (fname, 100 + 8 + 8 + 8 + 12 + 12 + 1)
109 def corrupt_entire_header (_, fname, compress, encrypt):
111 Flip all bits in the first object header.
114 flip_bits (fname, 0, 0xff, crypto.PDTCRYPT_HDR_SIZE)
115 elif compress is True: # invalidate magic
116 flip_bits (fname, 0, 0xff, gz_header_size (fname))
118 flip_bits (fname, 0, 0xff, tarfile.BLOCKSIZE)
121 def corrupt_payload_start (_, fname, compress, encrypt):
123 Modify the byte following the object header structure of the format.
126 flip_bits (fname, crypto.PDTCRYPT_HDR_SIZE + 1)
127 elif compress is True:
128 flip_bits (fname, gz_header_size (fname) + 1)
130 flip_bits (fname, tarfile.BLOCKSIZE + 1)
133 def corrupt_trailing_data (_, fname, compress, encrypt):
135 Modify the byte following the object header structure of the format.
137 junk = os.urandom (42)
138 fd = os.open (fname, os.O_WRONLY | os.O_APPEND)
143 def corrupt_volume (_, fname, compress, encrypt):
145 Zero out an entire volume.
147 fd = os.open (fname, os.O_WRONLY)
148 size = os.lseek (fd, 0, os.SEEK_END)
149 assert os.lseek (fd, 0, os.SEEK_SET) == 0
150 zeros = bytes (b'\x00' * TEST_BLOCKSIZE)
152 todo = min (size, TEST_BLOCKSIZE)
153 os.write (fd, zeros [:todo])
158 def corrupt_hole (_, fname, compress, encrypt):
160 Cut file in three pieces, reassemble without the middle one.
162 aname = os.path.abspath (fname)
163 infd = os.open (fname, os.O_RDONLY)
164 size = os.lseek (infd, 0, os.SEEK_END)
165 assert os.lseek (infd, 0, os.SEEK_SET) == 0
166 assert size > 3 * TEST_BLOCKSIZE
167 hole = (size / 3, size * 2 / 3)
168 outfd = os.open (os.path.dirname (aname), os.O_WRONLY | os.O_TMPFILE,
169 stat.S_IRUSR | stat.S_IWUSR)
171 zeros = bytes (b'\x00' * TEST_BLOCKSIZE)
174 data = os.read (infd, TEST_BLOCKSIZE)
175 if done < hole [0] or hole [1] < done:
176 # only copy from outside hole
177 os.write (outfd, data)
183 path = "/proc/self/fd/%d" % outfd
184 os.link (path, aname, src_dir_fd=0, follow_symlinks=True)
187 def immaculate (_, _fname, _compress, _encrypt):
193 ###############################################################################
195 ###############################################################################
197 class DefectiveTest (BaseTest):
199 Disaster recovery: restore corrupt backups.
204 FAILURES = 0 # files that could not be restored
205 MISMATCHES = 0 # files that were restored but corrupted
206 CORRUPT = corrupt_payload_start
208 MISSING = None # normally the number of failures
213 Create base test data
215 self.pwd = os.getcwd()
216 self.dst_path = "source_dir"
217 self.src_path = "%s2" % self.dst_path
220 os.system('rm -rf target_dir source_dir* backup_dir* huge')
221 os.makedirs (self.src_path)
225 self.hash [f] = self.create_file ("%s/%s"
226 % (self.src_path, f), 5 + i)
231 Remove temporal files created by unit tests and reset globals.
234 os.system("rm -rf source_dir source_dir2 backup_dir*")
238 def default_volume_name (backup_file, _x, _y, n, *a, **kwa):
239 return backup_file % n
241 def gen_file_names (self, comp, pw):
242 bak_path = "backup_dir"
243 backup_file = "the_full_backup_%0.2d.tar"
244 backup_full = ("%s/%s" % (bak_path, backup_file)) % 0
245 index_file = "the_full_index"
247 if self.COMPRESSION is not None:
252 if self.PASSWORD is not None:
253 backup_file = "%s.%s" % (backup_file, deltatar.PDTCRYPT_EXTENSION)
254 backup_full = "%s.%s" % (backup_full, deltatar.PDTCRYPT_EXTENSION)
255 index_file = "%s.%s" % (index_file , deltatar.PDTCRYPT_EXTENSION)
257 return bak_path, backup_file, backup_full, index_file
260 class RecoverTest (DefectiveTest):
262 Recover: restore corrupt backups from index file information.
265 def test_recover_corrupt (self):
267 Perform various damaging actions that cause unreadable objects.
269 Expects the extraction to fail in normal mode. With disaster recovery,
270 extraction must succeed, and exactly one file must be missing.
272 mode = self.COMPRESSION or "#"
273 bak_path, backup_file, backup_full, index_file = \
274 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
277 # add n files for one nth the volume size each, corrected
278 # for metadata and tar block overhead
279 fsiz = int ( ( TEST_VOLSIZ
280 / (TEST_FILESPERVOL * VOLUME_OVERHEAD))
282 fcnt = (self.VOLUMES - 1) * TEST_FILESPERVOL
283 for i in range (fcnt):
284 nvol, invol = divmod(i, TEST_FILESPERVOL)
285 f = "dummy_vol_%d_n_%0.2d" % (nvol, invol)
286 self.hash [f] = self.create_file ("%s/%s"
287 % (self.src_path, f),
291 vname = partial (self.default_volume_name, backup_file)
292 dtar = deltatar.DeltaTar (mode=mode,
294 password=self.PASSWORD,
295 index_name_func=lambda _: index_file,
296 volume_name_func=vname)
298 dtar.create_full_backup \
299 (source_path=self.src_path, backup_path=bak_path,
302 if self.PASSWORD is not None:
303 # ensure all files are at least superficially in PDT format
304 for f in os.listdir (bak_path):
305 assert is_pdt_encrypted ("%s/%s" % (bak_path, f))
307 # first restore must succeed
308 dtar.restore_backup(target_path=self.dst_path,
309 backup_indexes_paths=[
310 "%s/%s" % (bak_path, index_file)
312 for key, value in self.hash.items ():
313 f = "%s/%s" % (self.dst_path, key)
314 assert os.path.exists (f)
315 assert value == self.md5sum (f)
316 shutil.rmtree (self.dst_path)
317 shutil.rmtree (self.src_path)
319 self.CORRUPT (backup_full,
320 self.COMPRESSION is not None,
321 self.PASSWORD is not None)
323 # normal restore must fail
325 dtar.restore_backup(target_path=self.dst_path,
326 backup_tar_path=backup_full)
327 except tarfile.CompressionError:
328 if self.PASSWORD is not None or self.COMPRESSION is not None:
332 except tarfile.ReadError:
333 # can happen with all three modes
335 except tarfile.DecryptionError:
336 if self.PASSWORD is not None:
341 os.chdir (self.pwd) # not restored due to the error above
342 # but recover will succeed
343 failed = dtar.recover_backup(target_path=self.dst_path,
344 backup_indexes_paths=[
345 "%s/%s" % (bak_path, index_file)
348 assert len (failed) == self.FAILURES
350 # with one file missing
353 for key, value in self.hash.items ():
354 kkey = "%s/%s" % (self.dst_path, key)
355 if os.path.exists (kkey):
356 if value != self.md5sum (kkey):
357 mismatch.append (key)
361 # usually, an object whose extraction fails will not be found on
362 # disk afterwards so the number of failures equals that of missing
363 # files. however, some modes will create partial files for objects
364 # spanning multiple volumes that contain the parts whose checksums
366 assert len (missing) == (self.MISSING if self.MISSING is not None
368 assert len (mismatch) == self.MISMATCHES
370 shutil.rmtree (self.dst_path)
373 class RescueTest (DefectiveTest):
375 Rescue: restore corrupt backups from backup set that is damaged to a degree
376 that the index file is worthless.
379 def test_rescue_corrupt (self):
381 Perform various damaging actions that cause unreadable objects, then
382 attempt to extract objects regardless.
384 mode = self.COMPRESSION or "#"
385 bak_path, backup_file, backup_full, index_file = \
386 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
389 # add n files for one nth the volume size each, corrected
390 # for metadata and tar block overhead
391 fsiz = int ( ( TEST_VOLSIZ
392 / (TEST_FILESPERVOL * VOLUME_OVERHEAD))
394 fcnt = (self.VOLUMES - 1) * TEST_FILESPERVOL
395 for i in range (fcnt):
396 nvol, invol = divmod(i, TEST_FILESPERVOL)
397 f = "dummy_vol_%d_n_%0.2d" % (nvol, invol)
398 self.hash [f] = self.create_file ("%s/%s"
399 % (self.src_path, f),
403 vname = partial (self.default_volume_name, backup_file)
404 dtar = deltatar.DeltaTar (mode=mode,
406 password=self.PASSWORD,
407 index_name_func=lambda _: index_file,
408 volume_name_func=vname)
410 dtar.create_full_backup \
411 (source_path=self.src_path, backup_path=bak_path,
414 if self.PASSWORD is not None:
415 # ensure all files are at least superficially in PDT format
416 for f in os.listdir (bak_path):
417 assert is_pdt_encrypted ("%s/%s" % (bak_path, f))
419 # first restore must succeed
420 dtar.restore_backup(target_path=self.dst_path,
421 backup_indexes_paths=[
422 "%s/%s" % (bak_path, index_file)
424 for key, value in self.hash.items ():
425 f = "%s/%s" % (self.dst_path, key)
426 assert os.path.exists (f)
427 assert value == self.md5sum (f)
428 shutil.rmtree (self.dst_path)
429 shutil.rmtree (self.src_path)
431 self.CORRUPT (backup_full,
432 self.COMPRESSION is not None,
433 self.PASSWORD is not None)
435 # normal restore must fail
437 dtar.restore_backup(target_path=self.dst_path,
438 backup_tar_path=backup_full)
439 except tarfile.CompressionError:
440 if self.PASSWORD is not None or self.COMPRESSION is not None:
444 except tarfile.ReadError:
445 # can happen with all three modes
447 except tarfile.DecryptionError:
448 if self.PASSWORD is not None:
453 os.chdir (self.pwd) # not restored due to the error above
454 # but recover will succeed
455 failed = dtar.rescue_backup(target_path=self.dst_path,
456 backup_tar_path=backup_full)
458 assert len (failed) == self.FAILURES
460 # with one file missing
463 for key, value in self.hash.items ():
464 kkey = "%s/%s" % (self.dst_path, key)
465 if os.path.exists (kkey):
466 if value != self.md5sum (kkey):
467 mismatch.append (key)
471 assert len (missing) == (self.MISSING if self.MISSING is not None
473 assert len (mismatch) == self.MISMATCHES
475 shutil.rmtree (self.dst_path)
478 class GenIndexTest (DefectiveTest):
480 Deducing an index for a backup with tarfile.
483 def test_gen_index (self):
485 Create backup, leave it unharmed, then generate an index.
487 mode = self.COMPRESSION or "#"
488 bak_path, backup_file, backup_full, index_file = \
489 self.gen_file_names (self.COMPRESSION, self.PASSWORD)
491 vname = partial (self.default_volume_name, backup_file)
492 dtar = deltatar.DeltaTar (mode=mode,
494 password=self.PASSWORD,
495 index_name_func=lambda _: index_file,
496 volume_name_func=vname)
498 dtar.create_full_backup \
499 (source_path=self.src_path, backup_path=bak_path,
502 psidx = tarfile.gen_rescue_index (backup_full, mode, password=self.PASSWORD)
504 assert len (psidx) == len (self.hash)
507 ###############################################################################
509 ###############################################################################
511 class RecoverCorruptPayloadTestBase (RecoverTest):
514 FAILURES = 0 # tarfile will restore but corrupted, as
515 MISMATCHES = 1 # revealed by the hash
517 class RecoverCorruptPayloadSingleTest (RecoverCorruptPayloadTestBase):
520 class RecoverCorruptPayloadMultiTest (RecoverCorruptPayloadTestBase):
524 class RecoverCorruptPayloadGZTestBase (RecoverTest):
530 class RecoverCorruptPayloadGZSingleTest (RecoverCorruptPayloadGZTestBase):
533 class RecoverCorruptPayloadGZMultiTest (RecoverCorruptPayloadGZTestBase):
537 class RecoverCorruptPayloadGZAESTestBase (RecoverTest):
539 PASSWORD = TEST_PASSWORD
543 class RecoverCorruptPayloadGZAESSingleTest (RecoverCorruptPayloadGZAESTestBase):
546 class RecoverCorruptPayloadGZAESMultiTest (RecoverCorruptPayloadGZAESTestBase):
550 class RecoverCorruptHeaderTestBase (RecoverTest):
554 CORRUPT = corrupt_header
557 class RecoverCorruptHeaderSingleTest (RecoverCorruptHeaderTestBase):
560 class RecoverCorruptHeaderMultiTest (RecoverCorruptHeaderTestBase):
564 class RecoverCorruptHeaderGZTestBase (RecoverTest):
568 CORRUPT = corrupt_header
571 class RecoverCorruptHeaderGZSingleTest (RecoverCorruptHeaderGZTestBase):
574 class RecoverCorruptHeaderGZMultiTest (RecoverCorruptHeaderGZTestBase):
578 class RecoverCorruptHeaderGZAESTestBase (RecoverTest):
580 PASSWORD = TEST_PASSWORD
582 CORRUPT = corrupt_header
585 class RecoverCorruptHeaderGZAESSingleTest (RecoverCorruptHeaderGZAESTestBase):
588 class RecoverCorruptHeaderGZAESMultiTest (RecoverCorruptHeaderGZAESTestBase):
592 class RecoverCorruptEntireHeaderTestBase (RecoverTest):
596 CORRUPT = corrupt_entire_header
599 class RecoverCorruptEntireHeaderSingleTest (RecoverCorruptEntireHeaderTestBase):
602 class RecoverCorruptEntireHeaderMultiTest (RecoverCorruptEntireHeaderTestBase):
606 class RecoverCorruptEntireHeaderGZTestBase (RecoverTest):
610 CORRUPT = corrupt_entire_header
613 class RecoverCorruptEntireHeaderGZSingleTest (RecoverCorruptEntireHeaderGZTestBase):
616 class RecoverCorruptEntireHeaderGZMultiTest (RecoverCorruptEntireHeaderGZTestBase):
620 class RecoverCorruptEntireHeaderGZAESTestBase (RecoverTest):
622 PASSWORD = TEST_PASSWORD
624 CORRUPT = corrupt_entire_header
627 class RecoverCorruptEntireHeaderGZAESSingleTest (RecoverCorruptEntireHeaderGZAESTestBase):
630 class RecoverCorruptEntireHeaderGZAESMultiTest (RecoverCorruptEntireHeaderGZAESTestBase):
634 class RecoverCorruptTrailingDataTestBase (RecoverTest):
635 # plain Tar is indifferent against traling data and the results
640 CORRUPT = corrupt_trailing_data
643 class RecoverCorruptTrailingDataSingleTest (RecoverCorruptTrailingDataTestBase):
646 class RecoverCorruptTrailingDataMultiTest (RecoverCorruptTrailingDataTestBase):
647 # the last object in first archive has extra bytes somewhere in the
648 # middle because tar itself performs no data checksumming.
653 class RecoverCorruptTrailingDataGZTestBase (RecoverTest):
654 # reading past the final object will cause decompression failure;
655 # all objects except for the last survive unharmed though
659 CORRUPT = corrupt_trailing_data
662 class RecoverCorruptTrailingDataGZSingleTest (RecoverCorruptTrailingDataGZTestBase):
665 class RecoverCorruptTrailingDataGZMultiTest (RecoverCorruptTrailingDataGZTestBase):
667 # the last file of the first volume will only contain the data of the
668 # second part which is contained in the second volume. this happens
669 # because the CRC32 is wrong for the first part so it gets discarded, then
670 # the object is recreated from the first header of the second volume,
671 # containing only the remainder of the data.
676 class RecoverCorruptTrailingDataGZAESTestBase (RecoverTest):
678 PASSWORD = TEST_PASSWORD
680 CORRUPT = corrupt_trailing_data
683 class RecoverCorruptTrailingDataGZAESSingleTest (RecoverCorruptTrailingDataGZAESTestBase):
686 class RecoverCorruptTrailingDataGZAESMultiTest (RecoverCorruptTrailingDataGZAESTestBase):
690 class RecoverCorruptVolumeBaseTest (RecoverTest):
694 CORRUPT = corrupt_volume
697 class RecoverCorruptVolumeTest (RecoverCorruptVolumeBaseTest):
700 class RecoverCorruptVolumeGZTest (RecoverCorruptVolumeBaseTest):
703 class RecoverCorruptVolumeGZAESTest (RecoverCorruptVolumeBaseTest):
705 PASSWORD = TEST_PASSWORD
708 class RecoverCorruptHoleBaseTest (RecoverTest):
710 Cut bytes from the middle of a volume.
712 Index-based recovery works only up to the hole.
717 CORRUPT = corrupt_hole
718 VOLUMES = 2 # request two vols to swell up the first one
721 class RecoverCorruptHoleTest (RecoverCorruptHoleBaseTest):
724 class RecoverCorruptHoleGZTest (RecoverCorruptHoleBaseTest):
728 class RecoverCorruptHoleGZAESTest (RecoverCorruptHoleBaseTest):
730 PASSWORD = TEST_PASSWORD
733 ###############################################################################
735 ###############################################################################
737 class RescueCorruptHoleBaseTest (RescueTest):
739 Cut bytes from the middle of a volume.
744 CORRUPT = corrupt_hole
745 VOLUMES = 2 # request two vols to swell up the first one
748 class RescueCorruptHoleTest (RescueCorruptHoleBaseTest):
751 class RescueCorruptHoleGZTest (RescueCorruptHoleBaseTest):
755 class RescueCorruptHoleGZAESTest (RescueCorruptHoleBaseTest):
757 PASSWORD = TEST_PASSWORD
760 ###############################################################################
762 ###############################################################################
764 class GenIndexIntactBaseTest (GenIndexTest):
775 class GenIndexIntactTest (GenIndexIntactBaseTest):
778 class GenIndexIntactGZTest (GenIndexIntactBaseTest):
782 class GenIndexIntactGZAESTest (GenIndexIntactBaseTest):
784 PASSWORD = TEST_PASSWORD