developer.intra2net.com Git - python-delta-tar/blob - testing/test_recover.py

   1 import logging
   2 import os
   3 import shutil
   4
   5 import deltatar.deltatar as deltatar
   6 import deltatar.crypto   as crypto
   7 import deltatar.tarfile  as tarfile
   8
   9 from . import BaseTest
  10
  11 TEST_PASSWORD     = "test1234"
  12 TEST_VOLSIZ       = 2 # MB
  13 TEST_FILESPERVOL  = 3
  14 VOLUME_OVERHEAD   = 1.4 # account for tar overhead when fitting files into
  15                         # volumes; this is black magic
  16 TEST_BLOCKSIZE    = 4096
  17
  18 ###############################################################################
  19 ## helpers                                                                   ##
  20 ###############################################################################
  21
  22 def flip_bits (fname, off, b=0x01, n=1):
  23     """
  24     Open file *fname* at offset *off*, replacing the next *n* bytes with
  25     their values xor’ed with *b*.
  26     """
  27     fd = os.open (fname, os.O_RDWR)
  28
  29     try:
  30         pos = os.lseek (fd, off, os.SEEK_SET)
  31         assert pos == off
  32         chunk = os.read (fd, n)
  33         chunk = bytes (map (lambda v: v ^ b, chunk))
  34         pos = os.lseek (fd, off, os.SEEK_SET)
  35         assert pos == off
  36         os.write (fd, chunk)
  37     finally:
  38         os.close (fd)
  39
  40
  41 def gz_header_size (fname, off=0):
  42     """
  43     Determine the length of the gzip header starting at *off* in file fname.
  44
  45     The header is variable length because it may contain the filename as NUL
  46     terminated bytes.
  47     """
  48     # length so we need to determine where the actual payload starts
  49     off = tarfile.GZ_HEADER_SIZE
  50     fd = os.open (fname, os.O_RDONLY)
  51
  52     try:
  53         pos = os.lseek (fd, off, os.SEEK_SET)
  54         assert pos == off
  55         while os.read (fd, 1)[0] != 0:
  56             off += 1
  57             pos = os.lseek (fd, off, os.SEEK_SET)
  58             assert pos == off
  59     finally:
  60         os.close (fd)
  61
  62     return off
  63
  64
  65 def is_pdt_encrypted (fname):
  66     """
  67     Returns true if the file contains at least one PDT header plus enough
  68     space for the object.
  69     """
  70     try:
  71         with open (fname, "rb") as st:
  72             hdr = crypto.hdr_read_stream (st)
  73             siz = hdr ["ctsize"]
  74             assert (len (st.read (siz)) == siz)
  75     except Exception as exn:
  76         return False
  77     return True
  78
  79
  80 def corrupt_header (_, fname, compress, encrypt):
  81     """
  82     Modify a significant byte in the object header of the format.
  83     """
  84     if encrypt is True: # damage GCM tag
  85         flip_bits (fname, crypto.HDR_OFF_TAG + 1)
  86     elif compress is True: # invalidate magic
  87         flip_bits (fname, 1)
  88     else: # Fudge checksum. From tar(5):
  89         #
  90         #       struct header_gnu_tar {
  91         #               char name[100];
  92         #               char mode[8];
  93         #               char uid[8];
  94         #               char gid[8];
  95         #               char size[12];
  96         #               char mtime[12];
  97         #               char checksum[8];
  98         #               …
  99         flip_bits (fname, 100 + 8 + 8 + 8 + 12 + 12 + 1)
 100
 101
 102 def corrupt_entire_header (_, fname, compress, encrypt):
 103     """
 104     Flip all bits in the first object header.
 105     """
 106     if encrypt is True:
 107         flip_bits (fname, 0, 0xff, crypto.PDTCRYPT_HDR_SIZE)
 108     elif compress is True: # invalidate magic
 109         flip_bits (fname, 0, 0xff, gz_header_size (fname))
 110     else:
 111         flip_bits (fname, 0, 0xff, tarfile.BLOCKSIZE)
 112
 113
 114 def corrupt_payload_start (_, fname, compress, encrypt):
 115     """
 116     Modify the byte following the object header structure of the format.
 117     """
 118     if encrypt is True:
 119         flip_bits (fname, crypto.PDTCRYPT_HDR_SIZE + 1)
 120     elif compress is True:
 121         flip_bits (fname, gz_header_size (fname) + 1)
 122     else:
 123         flip_bits (fname, tarfile.BLOCKSIZE + 1)
 124
 125
 126 def corrupt_trailing_data (_, fname, compress, encrypt):
 127     """
 128     Modify the byte following the object header structure of the format.
 129     """
 130     junk = os.urandom (42)
 131     fd = os.open (fname, os.O_WRONLY | os.O_APPEND)
 132     os.write (fd, junk)
 133     os.close (fd)
 134
 135
 136 def corrupt_volume (_, fname, compress, encrypt):
 137     """
 138     Zero out an entire volume.
 139     """
 140     fd = os.open (fname, os.O_WRONLY)
 141     size = os.lseek (fd, 0, os.SEEK_END)
 142     assert os.lseek (fd, 0, os.SEEK_SET) == 0
 143     zeros = bytes (b'\x00' * TEST_BLOCKSIZE)
 144     while size > 0:
 145         todo = min (size, TEST_BLOCKSIZE)
 146         os.write (fd, zeros [:todo])
 147         size -= todo
 148     os.close (fd)
 149
 150
 151 ###############################################################################
 152 ## tests                                                                     ##
 153 ###############################################################################
 154
 155 class RecoverTest (BaseTest):
 156     """
 157     Disaster recovery: restore corrupt backups.
 158     """
 159
 160     COMPRESSION = None
 161     PASSWORD    = None
 162     FAILURES    = 0     # files that could not be restored
 163     MISMATCHES  = 0     # files that were restored but corrupted
 164     CORRUPT     = corrupt_payload_start
 165     VOLUMES     = 1
 166     MISSING     = None  # normally the number of failures
 167
 168
 169     def setUp(self):
 170         '''
 171         Create base test data
 172         '''
 173         self.pwd      = os.getcwd()
 174         self.dst_path = "source_dir"
 175         self.src_path = "%s2" % self.dst_path
 176         self.hash     = dict()
 177
 178         os.system('rm -rf target_dir source_dir* backup_dir* huge')
 179         os.makedirs (self.src_path)
 180
 181         for i in range (5):
 182             f = "dummy_%d" % i
 183             self.hash [f] = self.create_file ("%s/%s"
 184                                               % (self.src_path, f), 5 + i)
 185
 186
 187     def tearDown(self):
 188         '''
 189         Remove temporal files created by unit tests and reset globals.
 190         '''
 191         os.chdir(self.pwd)
 192         os.system("rm -rf source_dir source_dir2 backup_dir*")
 193
 194
 195     def test_recover_corrupt (self):
 196         """
 197         Perform various damaging actions that cause unreadable objects.
 198
 199         Expects the extraction to fail in normal mode. With disaster recovery,
 200         extraction must succeed, and exactly one file must be missing.
 201         """
 202         mode           = self.COMPRESSION or "#"
 203         bak_path       = "backup_dir"
 204         backup_file    = "the_full_backup_%0.2d.tar"
 205         backup_full    = ("%s/%s" % (bak_path, backup_file)) % 0
 206         index_file     = "the_full_index"
 207
 208         if self.COMPRESSION is not None:
 209             backup_file += ".gz"
 210             backup_full += ".gz"
 211             index_file  += ".gz"
 212
 213         if self.PASSWORD is not None:
 214             backup_file = "%s.%s" % (backup_file, deltatar.PDTCRYPT_EXTENSION)
 215             backup_full = "%s.%s" % (backup_full, deltatar.PDTCRYPT_EXTENSION)
 216             index_file  = "%s.%s" % (index_file , deltatar.PDTCRYPT_EXTENSION)
 217
 218         if self.VOLUMES > 1:
 219             # add n files for one nth the volume size each, corrected
 220             # for metadata and tar block overhead
 221             fsiz = int (  (  TEST_VOLSIZ
 222                            / (TEST_FILESPERVOL * VOLUME_OVERHEAD))
 223                         * 1024 * 1024)
 224             fcnt = (self.VOLUMES - 1) * TEST_FILESPERVOL
 225             for i in range (fcnt):
 226                 nvol, invol = divmod(i, TEST_FILESPERVOL)
 227                 f = "dummy_vol_%d_n_%0.2d" % (nvol, invol)
 228                 self.hash [f] = self.create_file ("%s/%s"
 229                                                   % (self.src_path, f),
 230                                                   fsiz,
 231                                                   random=True)
 232
 233         def vname (_x, _y, n, *a, **kwa):
 234             return backup_file % n
 235
 236         dtar = deltatar.DeltaTar (mode=mode,
 237                                   logger=None,
 238                                   password=self.PASSWORD,
 239                                   index_name_func=lambda _: index_file,
 240                                   volume_name_func=vname)
 241
 242         dtar.create_full_backup \
 243             (source_path=self.src_path, backup_path=bak_path,
 244              max_volume_size=1)
 245
 246         if self.PASSWORD is not None:
 247             # ensure all files are at least superficially in PDT format
 248             for f in os.listdir (bak_path):
 249                 assert is_pdt_encrypted ("%s/%s" % (bak_path, f))
 250
 251         # first restore must succeed
 252         dtar.restore_backup(target_path=self.dst_path,
 253                             backup_indexes_paths=[
 254                                 "%s/%s" % (bak_path, index_file)
 255                             ])
 256         for key, value in self.hash.items ():
 257             f = "%s/%s" % (self.dst_path, key)
 258             assert os.path.exists (f)
 259             assert value == self.md5sum (f)
 260         shutil.rmtree (self.dst_path)
 261         shutil.rmtree (self.src_path)
 262
 263         self.CORRUPT (backup_full,
 264                       self.COMPRESSION is not None,
 265                       self.PASSWORD    is not None)
 266
 267         # normal restore must fail
 268         try:
 269             dtar.restore_backup(target_path=self.dst_path,
 270                                 backup_tar_path=backup_full)
 271         except tarfile.CompressionError:
 272             if self.PASSWORD is not None or self.COMPRESSION is not None:
 273                 pass
 274             else:
 275                 raise
 276         except tarfile.ReadError:
 277             # can happen with all three modes
 278             pass
 279         except tarfile.DecryptionError:
 280             if self.PASSWORD is not None:
 281                 pass
 282             else:
 283                 raise
 284
 285         os.chdir (self.pwd) # not restored due to the error above
 286         # but recover will succeed
 287         failed = dtar.recover_backup(target_path=self.dst_path,
 288                                      backup_indexes_paths=[
 289                                          "%s/%s" % (bak_path, index_file)
 290                                      ])
 291
 292         assert len (failed) == self.FAILURES
 293
 294         # with one file missing
 295         missing  = []
 296         mismatch = []
 297         for key, value in self.hash.items ():
 298             kkey = "%s/%s" % (self.dst_path, key)
 299             if os.path.exists (kkey):
 300                 if value != self.md5sum (kkey):
 301                     mismatch.append (key)
 302             else:
 303                 missing.append (key)
 304
 305         # usually, an object whose extraction fails will not be found on
 306         # disk afterwards so the number of failures equals that of missing
 307         # files. however, some modes will create partial files for objects
 308         # spanning multiple volumes that contain the parts whose checksums
 309         # were valid.
 310         assert len (missing)  == (self.MISSING if self.MISSING is not None
 311                                                else self.FAILURES)
 312         assert len (mismatch) == self.MISMATCHES
 313
 314         shutil.rmtree (self.dst_path)
 315
 316
 317 class RecoverCorruptPayloadTestBase (RecoverTest):
 318     COMPRESSION = None
 319     PASSWORD    = None
 320     FAILURES    = 0 # tarfile will restore but corrupted, as
 321     MISMATCHES  = 1 # revealed by the hash
 322
 323 class RecoverCorruptPayloadSingleTest (RecoverCorruptPayloadTestBase):
 324     VOLUMES     = 1
 325
 326 class RecoverCorruptPayloadMultiTest (RecoverCorruptPayloadTestBase):
 327     VOLUMES     = 3
 328
 329
 330 class RecoverCorruptPayloadGZTestBase (RecoverTest):
 331     COMPRESSION = "#gz"
 332     PASSWORD    = None
 333     FAILURES    = 1
 334     MISMATCHES  = 0
 335
 336 class RecoverCorruptPayloadGZSingleTest (RecoverCorruptPayloadGZTestBase):
 337     VOLUMES     = 1
 338
 339 class RecoverCorruptPayloadGZMultiTest (RecoverCorruptPayloadGZTestBase):
 340     VOLUMES     = 3
 341
 342
 343 class RecoverCorruptPayloadGZAESTestBase (RecoverTest):
 344     COMPRESSION = "#gz"
 345     PASSWORD    = TEST_PASSWORD
 346     FAILURES    = 1
 347     MISMATCHES  = 0
 348
 349 class RecoverCorruptPayloadGZAESSingleTest (RecoverCorruptPayloadGZAESTestBase):
 350     VOLUMES     = 1
 351
 352 class RecoverCorruptPayloadGZAESMultiTest (RecoverCorruptPayloadGZAESTestBase):
 353     VOLUMES     = 3
 354
 355
 356 class RecoverCorruptHeaderTestBase (RecoverTest):
 357     COMPRESSION = None
 358     PASSWORD    = None
 359     FAILURES    = 1
 360     CORRUPT     = corrupt_header
 361     MISMATCHES  = 0
 362
 363 class RecoverCorruptHeaderSingleTest (RecoverCorruptHeaderTestBase):
 364     VOLUMES     = 1
 365
 366 class RecoverCorruptHeaderMultiTest (RecoverCorruptHeaderTestBase):
 367     VOLUMES     = 3
 368
 369
 370 class RecoverCorruptHeaderGZTestBase (RecoverTest):
 371     COMPRESSION = "#gz"
 372     PASSWORD    = None
 373     FAILURES    = 1
 374     CORRUPT     = corrupt_header
 375     MISMATCHES  = 0
 376
 377 class RecoverCorruptHeaderGZSingleTest (RecoverCorruptHeaderGZTestBase):
 378     VOLUMES     = 1
 379
 380 class RecoverCorruptHeaderGZMultiTest (RecoverCorruptHeaderGZTestBase):
 381     VOLUMES     = 3
 382
 383
 384 class RecoverCorruptHeaderGZAESTestBase (RecoverTest):
 385     COMPRESSION = "#gz"
 386     PASSWORD    = TEST_PASSWORD
 387     FAILURES    = 1
 388     CORRUPT     = corrupt_header
 389     MISMATCHES  = 0
 390
 391 class RecoverCorruptHeaderGZAESSingleTest (RecoverCorruptHeaderGZAESTestBase):
 392     VOLUMES     = 1
 393
 394 class RecoverCorruptHeaderGZAESMultiTest (RecoverCorruptHeaderGZAESTestBase):
 395     VOLUMES     = 3
 396
 397
 398 class RecoverCorruptEntireHeaderTestBase (RecoverTest):
 399     COMPRESSION = None
 400     PASSWORD    = None
 401     FAILURES    = 1
 402     CORRUPT     = corrupt_entire_header
 403     MISMATCHES  = 0
 404
 405 class RecoverCorruptEntireHeaderSingleTest (RecoverCorruptEntireHeaderTestBase):
 406     VOLUMES     = 1
 407
 408 class RecoverCorruptEntireHeaderMultiTest (RecoverCorruptEntireHeaderTestBase):
 409     VOLUMES     = 3
 410
 411
 412 class RecoverCorruptEntireHeaderGZTestBase (RecoverTest):
 413     COMPRESSION = "#gz"
 414     PASSWORD    = None
 415     FAILURES    = 1
 416     CORRUPT     = corrupt_entire_header
 417     MISMATCHES  = 0
 418
 419 class RecoverCorruptEntireHeaderGZSingleTest (RecoverCorruptEntireHeaderGZTestBase):
 420     VOLUMES     = 1
 421
 422 class RecoverCorruptEntireHeaderGZMultiTest (RecoverCorruptEntireHeaderGZTestBase):
 423     VOLUMES     = 3
 424
 425
 426 class RecoverCorruptEntireHeaderGZAESTestBase (RecoverTest):
 427     COMPRESSION = "#gz"
 428     PASSWORD    = TEST_PASSWORD
 429     FAILURES    = 1
 430     CORRUPT     = corrupt_entire_header
 431     MISMATCHES  = 0
 432
 433 class RecoverCorruptEntireHeaderGZAESSingleTest (RecoverCorruptEntireHeaderGZAESTestBase):
 434     VOLUMES     = 1
 435
 436 class RecoverCorruptEntireHeaderGZAESMultiTest (RecoverCorruptEntireHeaderGZAESTestBase):
 437     VOLUMES     = 3
 438
 439
 440 class RecoverCorruptTrailingDataTestBase (RecoverTest):
 441     # plain Tar is indifferent against traling data and the results
 442     # are consistent
 443     COMPRESSION = None
 444     PASSWORD    = None
 445     FAILURES    = 0
 446     CORRUPT     = corrupt_trailing_data
 447     MISMATCHES  = 0
 448
 449 class RecoverCorruptTrailingDataSingleTest (RecoverCorruptTrailingDataTestBase):
 450     VOLUMES     = 1
 451
 452 class RecoverCorruptTrailingDataMultiTest (RecoverCorruptTrailingDataTestBase):
 453     # the last object in first archive has extra bytes somewhere in the
 454     # middle because tar itself performs no data checksumming.
 455     MISMATCHES  = 1
 456     VOLUMES     = 3
 457
 458
 459 class RecoverCorruptTrailingDataGZTestBase (RecoverTest):
 460     # reading past the final object will cause decompression failure;
 461     # all objects except for the last survive unharmed though
 462     COMPRESSION = "#gz"
 463     PASSWORD    = None
 464     FAILURES    = 1
 465     CORRUPT     = corrupt_trailing_data
 466     MISMATCHES  = 0
 467
 468 class RecoverCorruptTrailingDataGZSingleTest (RecoverCorruptTrailingDataGZTestBase):
 469     VOLUMES     = 1
 470
 471 class RecoverCorruptTrailingDataGZMultiTest (RecoverCorruptTrailingDataGZTestBase):
 472     VOLUMES     = 3
 473     # the last file of the first volume will only contain the data of the
 474     # second part which is contained in the second volume. this happens
 475     # because the CRC32 is wrong for the first part so it gets discarded, then
 476     # the object is recreated from the first header of the second volume,
 477     # containing only the remainder of the data.
 478     MISMATCHES  = 1
 479     MISSING     = 0
 480
 481
 482 class RecoverCorruptTrailingDataGZAESTestBase (RecoverTest):
 483     COMPRESSION = "#gz"
 484     PASSWORD    = TEST_PASSWORD
 485     FAILURES    = 0
 486     CORRUPT     = corrupt_trailing_data
 487     MISMATCHES  = 0
 488
 489 class RecoverCorruptTrailingDataGZAESSingleTest (RecoverCorruptTrailingDataGZAESTestBase):
 490     VOLUMES     = 1
 491
 492 class RecoverCorruptTrailingDataGZAESMultiTest (RecoverCorruptTrailingDataGZAESTestBase):
 493     VOLUMES     = 3
 494
 495
 496 class RecoverCorruptVolumeBaseTest (RecoverTest):
 497     COMPRESSION = None
 498     PASSWORD    = None
 499     FAILURES    = 8
 500     CORRUPT     = corrupt_volume
 501     VOLUMES     = 3
 502
 503 class RecoverCorruptVolumeTest (RecoverCorruptVolumeBaseTest):
 504     pass
 505
 506 class RecoverCorruptVolumeGZTest (RecoverTest):
 507     COMPRESSION = "#gz"
 508     PASSWORD    = None
 509     FAILURES    = 8
 510     CORRUPT     = corrupt_volume
 511     VOLUMES     = 3
 512
 513 class RecoverCorruptVolumeGZAESTest (RecoverTest):
 514     COMPRESSION = "#gz"
 515     PASSWORD    = TEST_PASSWORD
 516     FAILURES    = 8
 517     CORRUPT     = corrupt_volume
 518     VOLUMES     = 3
 519