5 import deltatar.deltatar as deltatar
6 import deltatar.crypto as crypto
7 import deltatar.tarfile as tarfile
11 TEST_PASSWORD = "test1234"
14 VOLUME_OVERHEAD = 1.4 # account for tar overhead when fitting files into
15 # volumes; this is black magic
18 ###############################################################################
20 ###############################################################################
22 def flip_bits (fname, off, b=0x01, n=1):
24 Open file *fname* at offset *off*, replacing the next *n* bytes with
25 their values xor’ed with *b*.
27 fd = os.open (fname, os.O_RDWR)
30 pos = os.lseek (fd, off, os.SEEK_SET)
32 chunk = os.read (fd, n)
33 chunk = bytes (map (lambda v: v ^ b, chunk))
34 pos = os.lseek (fd, off, os.SEEK_SET)
41 def gz_header_size (fname, off=0):
43 Determine the length of the gzip header starting at *off* in file fname.
45 The header is variable length because it may contain the filename as NUL
48 # length so we need to determine where the actual payload starts
49 off = tarfile.GZ_HEADER_SIZE
50 fd = os.open (fname, os.O_RDONLY)
53 pos = os.lseek (fd, off, os.SEEK_SET)
55 while os.read (fd, 1)[0] != 0:
57 pos = os.lseek (fd, off, os.SEEK_SET)
65 def is_pdt_encrypted (fname):
67 Returns true if the file contains at least one PDT header plus enough
71 with open (fname, "rb") as st:
72 hdr = crypto.hdr_read_stream (st)
74 assert (len (st.read (siz)) == siz)
75 except Exception as exn:
80 def corrupt_header (_, fname, compress, encrypt):
82 Modify a significant byte in the object header of the format.
84 if encrypt is True: # damage GCM tag
85 flip_bits (fname, crypto.HDR_OFF_TAG + 1)
86 elif compress is True: # invalidate magic
88 else: # Fudge checksum. From tar(5):
90 # struct header_gnu_tar {
99 flip_bits (fname, 100 + 8 + 8 + 8 + 12 + 12 + 1)
102 def corrupt_entire_header (_, fname, compress, encrypt):
104 Flip all bits in the first object header.
107 flip_bits (fname, 0, 0xff, crypto.PDTCRYPT_HDR_SIZE)
108 elif compress is True: # invalidate magic
109 flip_bits (fname, 0, 0xff, gz_header_size (fname))
111 flip_bits (fname, 0, 0xff, tarfile.BLOCKSIZE)
114 def corrupt_payload_start (_, fname, compress, encrypt):
116 Modify the byte following the object header structure of the format.
119 flip_bits (fname, crypto.PDTCRYPT_HDR_SIZE + 1)
120 elif compress is True:
121 flip_bits (fname, gz_header_size (fname) + 1)
123 flip_bits (fname, tarfile.BLOCKSIZE + 1)
126 def corrupt_trailing_data (_, fname, compress, encrypt):
128 Modify the byte following the object header structure of the format.
130 junk = os.urandom (42)
131 fd = os.open (fname, os.O_WRONLY | os.O_APPEND)
136 def corrupt_volume (_, fname, compress, encrypt):
138 Zero out an entire volume.
140 fd = os.open (fname, os.O_WRONLY)
141 size = os.lseek (fd, 0, os.SEEK_END)
142 assert os.lseek (fd, 0, os.SEEK_SET) == 0
143 zeros = bytes (b'\x00' * TEST_BLOCKSIZE)
145 todo = min (size, TEST_BLOCKSIZE)
146 os.write (fd, zeros [:todo])
151 ###############################################################################
153 ###############################################################################
155 class RecoverTest (BaseTest):
157 Disaster recovery: restore corrupt backups.
162 FAILURES = 0 # files that could not be restored
163 MISMATCHES = 0 # files that were restored but corrupted
164 CORRUPT = corrupt_payload_start
166 MISSING = None # normally the number of failures
171 Create base test data
173 self.pwd = os.getcwd()
174 self.dst_path = "source_dir"
175 self.src_path = "%s2" % self.dst_path
178 os.system('rm -rf target_dir source_dir* backup_dir* huge')
179 os.makedirs (self.src_path)
183 self.hash [f] = self.create_file ("%s/%s"
184 % (self.src_path, f), 5 + i)
189 Remove temporal files created by unit tests and reset globals.
192 os.system("rm -rf source_dir source_dir2 backup_dir*")
195 def test_recover_corrupt (self):
197 Perform various damaging actions that cause unreadable objects.
199 Expects the extraction to fail in normal mode. With disaster recovery,
200 extraction must succeed, and exactly one file must be missing.
202 mode = self.COMPRESSION or "#"
203 bak_path = "backup_dir"
204 backup_file = "the_full_backup_%0.2d.tar"
205 backup_full = ("%s/%s" % (bak_path, backup_file)) % 0
206 index_file = "the_full_index"
208 if self.COMPRESSION is not None:
213 if self.PASSWORD is not None:
214 backup_file = "%s.%s" % (backup_file, deltatar.PDTCRYPT_EXTENSION)
215 backup_full = "%s.%s" % (backup_full, deltatar.PDTCRYPT_EXTENSION)
216 index_file = "%s.%s" % (index_file , deltatar.PDTCRYPT_EXTENSION)
219 # add n files for one nth the volume size each, corrected
220 # for metadata and tar block overhead
221 fsiz = int ( ( TEST_VOLSIZ
222 / (TEST_FILESPERVOL * VOLUME_OVERHEAD))
224 fcnt = (self.VOLUMES - 1) * TEST_FILESPERVOL
225 for i in range (fcnt):
226 nvol, invol = divmod(i, TEST_FILESPERVOL)
227 f = "dummy_vol_%d_n_%0.2d" % (nvol, invol)
228 self.hash [f] = self.create_file ("%s/%s"
229 % (self.src_path, f),
233 def vname (_x, _y, n, *a, **kwa):
234 return backup_file % n
236 dtar = deltatar.DeltaTar (mode=mode,
238 password=self.PASSWORD,
239 index_name_func=lambda _: index_file,
240 volume_name_func=vname)
242 dtar.create_full_backup \
243 (source_path=self.src_path, backup_path=bak_path,
246 if self.PASSWORD is not None:
247 # ensure all files are at least superficially in PDT format
248 for f in os.listdir (bak_path):
249 assert is_pdt_encrypted ("%s/%s" % (bak_path, f))
251 # first restore must succeed
252 dtar.restore_backup(target_path=self.dst_path,
253 backup_indexes_paths=[
254 "%s/%s" % (bak_path, index_file)
256 for key, value in self.hash.items ():
257 f = "%s/%s" % (self.dst_path, key)
258 assert os.path.exists (f)
259 assert value == self.md5sum (f)
260 shutil.rmtree (self.dst_path)
261 shutil.rmtree (self.src_path)
263 self.CORRUPT (backup_full,
264 self.COMPRESSION is not None,
265 self.PASSWORD is not None)
267 # normal restore must fail
269 dtar.restore_backup(target_path=self.dst_path,
270 backup_tar_path=backup_full)
271 except tarfile.CompressionError:
272 if self.PASSWORD is not None or self.COMPRESSION is not None:
276 except tarfile.ReadError:
277 # can happen with all three modes
279 except tarfile.DecryptionError:
280 if self.PASSWORD is not None:
285 os.chdir (self.pwd) # not restored due to the error above
286 # but recover will succeed
287 failed = dtar.recover_backup(target_path=self.dst_path,
288 backup_indexes_paths=[
289 "%s/%s" % (bak_path, index_file)
292 assert len (failed) == self.FAILURES
294 # with one file missing
297 for key, value in self.hash.items ():
298 kkey = "%s/%s" % (self.dst_path, key)
299 if os.path.exists (kkey):
300 if value != self.md5sum (kkey):
301 mismatch.append (key)
305 # usually, an object whose extraction fails will not be found on
306 # disk afterwards so the number of failures equals that of missing
307 # files. however, some modes will create partial files for objects
308 # spanning multiple volumes that contain the parts whose checksums
310 assert len (missing) == (self.MISSING if self.MISSING is not None
312 assert len (mismatch) == self.MISMATCHES
314 shutil.rmtree (self.dst_path)
317 class RecoverCorruptPayloadTestBase (RecoverTest):
320 FAILURES = 0 # tarfile will restore but corrupted, as
321 MISMATCHES = 1 # revealed by the hash
323 class RecoverCorruptPayloadSingleTest (RecoverCorruptPayloadTestBase):
326 class RecoverCorruptPayloadMultiTest (RecoverCorruptPayloadTestBase):
330 class RecoverCorruptPayloadGZTestBase (RecoverTest):
336 class RecoverCorruptPayloadGZSingleTest (RecoverCorruptPayloadGZTestBase):
339 class RecoverCorruptPayloadGZMultiTest (RecoverCorruptPayloadGZTestBase):
343 class RecoverCorruptPayloadGZAESTestBase (RecoverTest):
345 PASSWORD = TEST_PASSWORD
349 class RecoverCorruptPayloadGZAESSingleTest (RecoverCorruptPayloadGZAESTestBase):
352 class RecoverCorruptPayloadGZAESMultiTest (RecoverCorruptPayloadGZAESTestBase):
356 class RecoverCorruptHeaderTestBase (RecoverTest):
360 CORRUPT = corrupt_header
363 class RecoverCorruptHeaderSingleTest (RecoverCorruptHeaderTestBase):
366 class RecoverCorruptHeaderMultiTest (RecoverCorruptHeaderTestBase):
370 class RecoverCorruptHeaderGZTestBase (RecoverTest):
374 CORRUPT = corrupt_header
377 class RecoverCorruptHeaderGZSingleTest (RecoverCorruptHeaderGZTestBase):
380 class RecoverCorruptHeaderGZMultiTest (RecoverCorruptHeaderGZTestBase):
384 class RecoverCorruptHeaderGZAESTestBase (RecoverTest):
386 PASSWORD = TEST_PASSWORD
388 CORRUPT = corrupt_header
391 class RecoverCorruptHeaderGZAESSingleTest (RecoverCorruptHeaderGZAESTestBase):
394 class RecoverCorruptHeaderGZAESMultiTest (RecoverCorruptHeaderGZAESTestBase):
398 class RecoverCorruptEntireHeaderTestBase (RecoverTest):
402 CORRUPT = corrupt_entire_header
405 class RecoverCorruptEntireHeaderSingleTest (RecoverCorruptEntireHeaderTestBase):
408 class RecoverCorruptEntireHeaderMultiTest (RecoverCorruptEntireHeaderTestBase):
412 class RecoverCorruptEntireHeaderGZTestBase (RecoverTest):
416 CORRUPT = corrupt_entire_header
419 class RecoverCorruptEntireHeaderGZSingleTest (RecoverCorruptEntireHeaderGZTestBase):
422 class RecoverCorruptEntireHeaderGZMultiTest (RecoverCorruptEntireHeaderGZTestBase):
426 class RecoverCorruptEntireHeaderGZAESTestBase (RecoverTest):
428 PASSWORD = TEST_PASSWORD
430 CORRUPT = corrupt_entire_header
433 class RecoverCorruptEntireHeaderGZAESSingleTest (RecoverCorruptEntireHeaderGZAESTestBase):
436 class RecoverCorruptEntireHeaderGZAESMultiTest (RecoverCorruptEntireHeaderGZAESTestBase):
440 class RecoverCorruptTrailingDataTestBase (RecoverTest):
441 # plain Tar is indifferent against traling data and the results
446 CORRUPT = corrupt_trailing_data
449 class RecoverCorruptTrailingDataSingleTest (RecoverCorruptTrailingDataTestBase):
452 class RecoverCorruptTrailingDataMultiTest (RecoverCorruptTrailingDataTestBase):
453 # the last object in first archive has extra bytes somewhere in the
454 # middle because tar itself performs no data checksumming.
459 class RecoverCorruptTrailingDataGZTestBase (RecoverTest):
460 # reading past the final object will cause decompression failure;
461 # all objects except for the last survive unharmed though
465 CORRUPT = corrupt_trailing_data
468 class RecoverCorruptTrailingDataGZSingleTest (RecoverCorruptTrailingDataGZTestBase):
471 class RecoverCorruptTrailingDataGZMultiTest (RecoverCorruptTrailingDataGZTestBase):
473 # the last file of the first volume will only contain the data of the
474 # second part which is contained in the second volume. this happens
475 # because the CRC32 is wrong for the first part so it gets discarded, then
476 # the object is recreated from the first header of the second volume,
477 # containing only the remainder of the data.
482 class RecoverCorruptTrailingDataGZAESTestBase (RecoverTest):
484 PASSWORD = TEST_PASSWORD
486 CORRUPT = corrupt_trailing_data
489 class RecoverCorruptTrailingDataGZAESSingleTest (RecoverCorruptTrailingDataGZAESTestBase):
492 class RecoverCorruptTrailingDataGZAESMultiTest (RecoverCorruptTrailingDataGZAESTestBase):
496 class RecoverCorruptVolumeBaseTest (RecoverTest):
500 CORRUPT = corrupt_volume
503 class RecoverCorruptVolumeTest (RecoverCorruptVolumeBaseTest):
506 class RecoverCorruptVolumeGZTest (RecoverTest):
510 CORRUPT = corrupt_volume
513 class RecoverCorruptVolumeGZAESTest (RecoverTest):
515 PASSWORD = TEST_PASSWORD
517 CORRUPT = corrupt_volume