Commit | Line | Data |
---|---|---|
dbd6ff68 PG |
1 | """ |
2 | Intra2net 2017 | |
3 | ||
4 | =============================================================================== | |
5 | test_recover.py – behavior facing file corruption | |
6 | =============================================================================== | |
7 | ||
8 | Corruptors have the signature ``(unittest × string × bool × bool) → void``, | |
9 | where the *string* argument is the name of the file to modify, the *booleans* | |
10 | specialize the operation for compressed and encrypted data. Issues are | |
11 | communicated upward by throwing. | |
12 | ||
13 | - corrupt_header (): | |
14 | Modify the first object header where it hurts. With encryption, the tag | |
15 | is corrupted to cause authentication of the decrypted data to fail. For | |
16 | compressed data, the two byte magic is altered, for uncompressed | |
17 | archives, the tar header checksum field. | |
18 | ||
19 | - corrupt_ctsize (): | |
20 | Modify the *ctsize* field of a PDTCRYPT header. The goal is to have | |
21 | decryption continue past the end of the object, causing data | |
22 | authentication to fail and file reads to be at odds with the offsets in | |
23 | the index. Only applicable to encrypted archives; will raise | |
24 | *UndefinedTest* otherwise. | |
25 | ||
26 | - corrupt_entire_header (): | |
27 | Invert all bits of the first object header (PDTCRYPT, gzip, tar) without | |
28 | affecting the payload. This renders the object unreadable; the file will | |
29 | be resemble one with arbitrary leading data but all the remaining object | |
30 | offsets intact, so the contents can still be extracted with index based | |
31 | recovery. | |
32 | ||
33 | - corrupt_payload_start (): | |
34 | For all header variants, skip to the first byte past the header and | |
35 | corrupt it. Encrypted objects will fail to authenticate. Compressed | |
36 | objects will yield a bad CRC32. The Tar layer will take no notice but | |
37 | the extracted object will fail an independent checksum comparison with | |
38 | that of the original file. | |
39 | ||
40 | - corrupt_leading_garbage (): | |
41 | Prepend random data to an otherwise valid file. Creates a situation that | |
42 | index based recovery cannot handle by shifting the offsets of all objects | |
43 | in the file. In rescue mode, these objects must be located and extracted | |
44 | regardless. | |
45 | ||
46 | - corrupt_trailing_data (): | |
47 | Append data to an otherwise valid file. Both the recovery and rescue | |
48 | modes must be able to retrieve all objects from that file. | |
49 | ||
50 | - corrupt_volume (): | |
51 | Zero out an entire backup file. This is interesting for multivolume | |
52 | tests: all files from the affected volume must be missing but objects | |
53 | that span volume bounds will still be partially recoverable. | |
54 | ||
55 | - corrupt_hole (): | |
56 | Remove a region from a file. Following the damaged part, no object can be | |
57 | recovered in index mode, but rescue mode will still find those. The | |
58 | object containing the start of the hole will fail checksum tests because | |
59 | of the missing part and the overlap with the subsequent object. | |
60 | ||
61 | """ | |
62 | ||
fbdc9f4a PG |
63 | import logging |
64 | import os | |
65 | import shutil | |
3692fd82 | 66 | import stat |
b9cf4a0f PG |
67 | import sys |
68 | import unittest | |
fbdc9f4a | 69 | |
2fe5f6e7 PG |
70 | from functools import partial |
71 | ||
fbdc9f4a | 72 | import deltatar.deltatar as deltatar |
3267933a | 73 | import deltatar.crypto as crypto |
203cb25e | 74 | import deltatar.tarfile as tarfile |
fbdc9f4a PG |
75 | |
76 | from . import BaseTest | |
77 | ||
e25f31ac | 78 | TEST_PASSWORD = "test1234" |
85e7013f | 79 | TEST_VOLSIZ = 2 # MB |
e25f31ac | 80 | TEST_FILESPERVOL = 3 |
85e7013f PG |
81 | VOLUME_OVERHEAD = 1.4 # account for tar overhead when fitting files into |
82 | # volumes; this is black magic | |
20e1d773 | 83 | TEST_BLOCKSIZE = 4096 |
96fe6399 PG |
84 | |
85 | ############################################################################### | |
86 | ## helpers ## | |
87 | ############################################################################### | |
88 | ||
3267933a PG |
89 | def flip_bits (fname, off, b=0x01, n=1): |
90 | """ | |
91 | Open file *fname* at offset *off*, replacing the next *n* bytes with | |
92 | their values xor’ed with *b*. | |
93 | """ | |
94 | fd = os.open (fname, os.O_RDWR) | |
203cb25e | 95 | |
3267933a PG |
96 | try: |
97 | pos = os.lseek (fd, off, os.SEEK_SET) | |
98 | assert pos == off | |
99 | chunk = os.read (fd, n) | |
100 | chunk = bytes (map (lambda v: v ^ b, chunk)) | |
da8996f0 PG |
101 | pos = os.lseek (fd, off, os.SEEK_SET) |
102 | assert pos == off | |
3267933a PG |
103 | os.write (fd, chunk) |
104 | finally: | |
105 | os.close (fd) | |
106 | ||
203cb25e PG |
107 | |
108 | def gz_header_size (fname, off=0): | |
109 | """ | |
110 | Determine the length of the gzip header starting at *off* in file fname. | |
111 | ||
112 | The header is variable length because it may contain the filename as NUL | |
113 | terminated bytes. | |
114 | """ | |
115 | # length so we need to determine where the actual payload starts | |
116 | off = tarfile.GZ_HEADER_SIZE | |
117 | fd = os.open (fname, os.O_RDONLY) | |
118 | ||
119 | try: | |
120 | pos = os.lseek (fd, off, os.SEEK_SET) | |
121 | assert pos == off | |
122 | while os.read (fd, 1)[0] != 0: | |
123 | off += 1 | |
124 | pos = os.lseek (fd, off, os.SEEK_SET) | |
125 | assert pos == off | |
126 | finally: | |
127 | os.close (fd) | |
128 | ||
129 | return off | |
130 | ||
da8996f0 | 131 | |
96fe6399 PG |
132 | def is_pdt_encrypted (fname): |
133 | """ | |
134 | Returns true if the file contains at least one PDT header plus enough | |
135 | space for the object. | |
136 | """ | |
137 | try: | |
138 | with open (fname, "rb") as st: | |
139 | hdr = crypto.hdr_read_stream (st) | |
140 | siz = hdr ["ctsize"] | |
141 | assert (len (st.read (siz)) == siz) | |
142 | except Exception as exn: | |
143 | return False | |
144 | return True | |
145 | ||
146 | ||
3692fd82 PG |
147 | ############################################################################### |
148 | ## corruption simulators ## | |
149 | ############################################################################### | |
150 | ||
0c8baf2b PG |
151 | class UndefinedTest (Exception): |
152 | """No test available for the asked combination of parameters.""" | |
153 | ||
00b8c150 PG |
154 | def corrupt_header (_, fname, compress, encrypt): |
155 | """ | |
156 | Modify a significant byte in the object header of the format. | |
157 | """ | |
158 | if encrypt is True: # damage GCM tag | |
159 | flip_bits (fname, crypto.HDR_OFF_TAG + 1) | |
160 | elif compress is True: # invalidate magic | |
161 | flip_bits (fname, 1) | |
162 | else: # Fudge checksum. From tar(5): | |
163 | # | |
164 | # struct header_gnu_tar { | |
165 | # char name[100]; | |
166 | # char mode[8]; | |
167 | # char uid[8]; | |
168 | # char gid[8]; | |
169 | # char size[12]; | |
170 | # char mtime[12]; | |
171 | # char checksum[8]; | |
172 | # … | |
173 | flip_bits (fname, 100 + 8 + 8 + 8 + 12 + 12 + 1) | |
174 | ||
175 | ||
0c8baf2b PG |
176 | def corrupt_ctsize (_, fname, compress, encrypt): |
177 | """ | |
178 | Blow up the size of an object so as to cause its apparent payload to leak | |
179 | into the next one. | |
180 | """ | |
181 | if encrypt is True: | |
182 | # damage lowest bit of second least significant byte of size field; | |
183 | # this effectively sets the ciphertext size to 422, causing it to | |
184 | # extend over the next object into the third one. | |
185 | return flip_bits (fname, crypto.HDR_OFF_CTSIZE + 1, b=0x01) | |
186 | raise UndefinedTest ("corrupt_ctsize %s %s %s" % (fname, compress, encrypt)) | |
187 | ||
188 | ||
da8996f0 PG |
189 | def corrupt_entire_header (_, fname, compress, encrypt): |
190 | """ | |
191 | Flip all bits in the first object header. | |
192 | """ | |
193 | if encrypt is True: | |
194 | flip_bits (fname, 0, 0xff, crypto.PDTCRYPT_HDR_SIZE) | |
dbd6ff68 | 195 | elif compress is True: |
da8996f0 PG |
196 | flip_bits (fname, 0, 0xff, gz_header_size (fname)) |
197 | else: | |
198 | flip_bits (fname, 0, 0xff, tarfile.BLOCKSIZE) | |
199 | ||
200 | ||
00b8c150 PG |
201 | def corrupt_payload_start (_, fname, compress, encrypt): |
202 | """ | |
203 | Modify the byte following the object header structure of the format. | |
204 | """ | |
205 | if encrypt is True: | |
206 | flip_bits (fname, crypto.PDTCRYPT_HDR_SIZE + 1) | |
207 | elif compress is True: | |
208 | flip_bits (fname, gz_header_size (fname) + 1) | |
209 | else: | |
210 | flip_bits (fname, tarfile.BLOCKSIZE + 1) | |
211 | ||
212 | ||
afb2d647 PG |
213 | def corrupt_leading_garbage (_, fname, compress, encrypt): |
214 | """ | |
215 | Prepend junk to file. | |
216 | """ | |
217 | aname = os.path.abspath (fname) | |
218 | infd = os.open (fname, os.O_RDONLY) | |
219 | size = os.lseek (infd, 0, os.SEEK_END) | |
220 | assert os.lseek (infd, 0, os.SEEK_SET) == 0 | |
221 | outfd = os.open (os.path.dirname (aname), os.O_WRONLY | os.O_TMPFILE, | |
222 | stat.S_IRUSR | stat.S_IWUSR) | |
a793ee30 | 223 | junk = os.urandom (42) |
afb2d647 PG |
224 | |
225 | # write new file with garbage prepended | |
226 | done = 0 | |
227 | os.write (outfd, junk) # junk first | |
228 | done += len (junk) | |
229 | while done < size: | |
230 | data = os.read (infd, TEST_BLOCKSIZE) | |
231 | os.write (outfd, data) | |
232 | done += len (data) | |
233 | ||
234 | assert os.lseek (outfd, 0, os.SEEK_CUR) == done | |
235 | ||
236 | # close and free old file | |
237 | os.close (infd) | |
238 | os.unlink (fname) | |
239 | ||
240 | # install the new file in its place, atomically | |
241 | path = "/proc/self/fd/%d" % outfd | |
242 | os.link (path, aname, src_dir_fd=0, follow_symlinks=True) | |
243 | os.close (outfd) | |
244 | ||
245 | ||
517d35b7 PG |
246 | def corrupt_trailing_data (_, fname, compress, encrypt): |
247 | """ | |
dbd6ff68 | 248 | Append random data to file. |
517d35b7 PG |
249 | """ |
250 | junk = os.urandom (42) | |
251 | fd = os.open (fname, os.O_WRONLY | os.O_APPEND) | |
252 | os.write (fd, junk) | |
253 | os.close (fd) | |
254 | ||
00b8c150 | 255 | |
20e1d773 PG |
256 | def corrupt_volume (_, fname, compress, encrypt): |
257 | """ | |
258 | Zero out an entire volume. | |
259 | """ | |
260 | fd = os.open (fname, os.O_WRONLY) | |
261 | size = os.lseek (fd, 0, os.SEEK_END) | |
262 | assert os.lseek (fd, 0, os.SEEK_SET) == 0 | |
263 | zeros = bytes (b'\x00' * TEST_BLOCKSIZE) | |
264 | while size > 0: | |
265 | todo = min (size, TEST_BLOCKSIZE) | |
266 | os.write (fd, zeros [:todo]) | |
267 | size -= todo | |
268 | os.close (fd) | |
269 | ||
270 | ||
3692fd82 PG |
271 | def corrupt_hole (_, fname, compress, encrypt): |
272 | """ | |
273 | Cut file in three pieces, reassemble without the middle one. | |
274 | """ | |
275 | aname = os.path.abspath (fname) | |
276 | infd = os.open (fname, os.O_RDONLY) | |
277 | size = os.lseek (infd, 0, os.SEEK_END) | |
278 | assert os.lseek (infd, 0, os.SEEK_SET) == 0 | |
279 | assert size > 3 * TEST_BLOCKSIZE | |
280 | hole = (size / 3, size * 2 / 3) | |
281 | outfd = os.open (os.path.dirname (aname), os.O_WRONLY | os.O_TMPFILE, | |
282 | stat.S_IRUSR | stat.S_IWUSR) | |
283 | ||
3692fd82 PG |
284 | done = 0 |
285 | while done < size: | |
286 | data = os.read (infd, TEST_BLOCKSIZE) | |
287 | if done < hole [0] or hole [1] < done: | |
288 | # only copy from outside hole | |
289 | os.write (outfd, data) | |
290 | done += len (data) | |
291 | ||
292 | os.close (infd) | |
293 | os.unlink (fname) | |
294 | ||
295 | path = "/proc/self/fd/%d" % outfd | |
296 | os.link (path, aname, src_dir_fd=0, follow_symlinks=True) | |
297 | os.close (outfd) | |
298 | ||
2fe5f6e7 PG |
299 | def immaculate (_, _fname, _compress, _encrypt): |
300 | """ | |
301 | No-op dummy. | |
302 | """ | |
303 | pass | |
3692fd82 | 304 | |
96fe6399 PG |
305 | ############################################################################### |
306 | ## tests ## | |
307 | ############################################################################### | |
203cb25e | 308 | |
0c6682ce | 309 | class DefectiveTest (BaseTest): |
fbdc9f4a PG |
310 | """ |
311 | Disaster recovery: restore corrupt backups. | |
312 | """ | |
313 | ||
96fe6399 PG |
314 | COMPRESSION = None |
315 | PASSWORD = None | |
9d89c237 PG |
316 | FAILURES = 0 # files that could not be restored |
317 | MISMATCHES = 0 # files that were restored but corrupted | |
00b8c150 | 318 | CORRUPT = corrupt_payload_start |
e25f31ac | 319 | VOLUMES = 1 |
4d4925de | 320 | MISSING = None # normally the number of failures |
96fe6399 | 321 | |
fbdc9f4a PG |
322 | |
323 | def setUp(self): | |
324 | ''' | |
325 | Create base test data | |
326 | ''' | |
96fe6399 PG |
327 | self.pwd = os.getcwd() |
328 | self.dst_path = "source_dir" | |
329 | self.src_path = "%s2" % self.dst_path | |
330 | self.hash = dict() | |
331 | ||
fbdc9f4a | 332 | os.system('rm -rf target_dir source_dir* backup_dir* huge') |
96fe6399 | 333 | os.makedirs (self.src_path) |
fbdc9f4a | 334 | |
96fe6399 | 335 | for i in range (5): |
85e7013f | 336 | f = "dummy_%d" % i |
96fe6399 PG |
337 | self.hash [f] = self.create_file ("%s/%s" |
338 | % (self.src_path, f), 5 + i) | |
fbdc9f4a | 339 | |
96fe6399 PG |
340 | |
341 | def tearDown(self): | |
342 | ''' | |
343 | Remove temporal files created by unit tests and reset globals. | |
344 | ''' | |
345 | os.chdir(self.pwd) | |
346 | os.system("rm -rf source_dir source_dir2 backup_dir*") | |
fbdc9f4a PG |
347 | |
348 | ||
2fe5f6e7 PG |
349 | @staticmethod |
350 | def default_volume_name (backup_file, _x, _y, n, *a, **kwa): | |
351 | return backup_file % n | |
0c6682ce | 352 | |
2fe5f6e7 | 353 | def gen_file_names (self, comp, pw): |
203cb25e | 354 | bak_path = "backup_dir" |
e25f31ac PG |
355 | backup_file = "the_full_backup_%0.2d.tar" |
356 | backup_full = ("%s/%s" % (bak_path, backup_file)) % 0 | |
96fe6399 PG |
357 | index_file = "the_full_index" |
358 | ||
359 | if self.COMPRESSION is not None: | |
360 | backup_file += ".gz" | |
361 | backup_full += ".gz" | |
362 | index_file += ".gz" | |
363 | ||
364 | if self.PASSWORD is not None: | |
e25f31ac PG |
365 | backup_file = "%s.%s" % (backup_file, deltatar.PDTCRYPT_EXTENSION) |
366 | backup_full = "%s.%s" % (backup_full, deltatar.PDTCRYPT_EXTENSION) | |
367 | index_file = "%s.%s" % (index_file , deltatar.PDTCRYPT_EXTENSION) | |
368 | ||
2fe5f6e7 PG |
369 | return bak_path, backup_file, backup_full, index_file |
370 | ||
371 | ||
047239f3 PG |
372 | def gen_multivol (self, nvol): |
373 | # add n files for one nth the volume size each, corrected | |
374 | # for metadata and tar block overhead | |
375 | fsiz = int ( ( TEST_VOLSIZ | |
376 | / (TEST_FILESPERVOL * VOLUME_OVERHEAD)) | |
377 | * 1024 * 1024) | |
378 | fcnt = (self.VOLUMES - 1) * TEST_FILESPERVOL | |
379 | for i in range (fcnt): | |
380 | nvol, invol = divmod(i, TEST_FILESPERVOL) | |
381 | f = "dummy_vol_%d_n_%0.2d" % (nvol, invol) | |
382 | self.hash [f] = self.create_file ("%s/%s" | |
383 | % (self.src_path, f), | |
384 | fsiz, | |
385 | random=True) | |
386 | ||
387 | ||
2fe5f6e7 PG |
388 | class RecoverTest (DefectiveTest): |
389 | """ | |
390 | Recover: restore corrupt backups from index file information. | |
391 | """ | |
392 | ||
393 | def test_recover_corrupt (self): | |
394 | """ | |
395 | Perform various damaging actions that cause unreadable objects. | |
396 | ||
397 | Expects the extraction to fail in normal mode. With disaster recovery, | |
398 | extraction must succeed, and exactly one file must be missing. | |
399 | """ | |
400 | mode = self.COMPRESSION or "#" | |
401 | bak_path, backup_file, backup_full, index_file = \ | |
402 | self.gen_file_names (self.COMPRESSION, self.PASSWORD) | |
403 | ||
e25f31ac | 404 | if self.VOLUMES > 1: |
047239f3 | 405 | self.gen_multivol (self.VOLUMES) |
e25f31ac | 406 | |
2fe5f6e7 | 407 | vname = partial (self.default_volume_name, backup_file) |
96fe6399 PG |
408 | dtar = deltatar.DeltaTar (mode=mode, |
409 | logger=None, | |
410 | password=self.PASSWORD, | |
203cb25e | 411 | index_name_func=lambda _: index_file, |
3267933a | 412 | volume_name_func=vname) |
fbdc9f4a PG |
413 | |
414 | dtar.create_full_backup \ | |
e25f31ac PG |
415 | (source_path=self.src_path, backup_path=bak_path, |
416 | max_volume_size=1) | |
96fe6399 PG |
417 | |
418 | if self.PASSWORD is not None: | |
419 | # ensure all files are at least superficially in PDT format | |
420 | for f in os.listdir (bak_path): | |
421 | assert is_pdt_encrypted ("%s/%s" % (bak_path, f)) | |
203cb25e PG |
422 | |
423 | # first restore must succeed | |
96fe6399 | 424 | dtar.restore_backup(target_path=self.dst_path, |
f090d35a PG |
425 | backup_indexes_paths=[ |
426 | "%s/%s" % (bak_path, index_file) | |
427 | ]) | |
203cb25e | 428 | for key, value in self.hash.items (): |
96fe6399 | 429 | f = "%s/%s" % (self.dst_path, key) |
b15e549b PG |
430 | assert os.path.exists (f) |
431 | assert value == self.md5sum (f) | |
96fe6399 PG |
432 | shutil.rmtree (self.dst_path) |
433 | shutil.rmtree (self.src_path) | |
203cb25e | 434 | |
00b8c150 PG |
435 | self.CORRUPT (backup_full, |
436 | self.COMPRESSION is not None, | |
437 | self.PASSWORD is not None) | |
203cb25e PG |
438 | |
439 | # normal restore must fail | |
96fe6399 PG |
440 | try: |
441 | dtar.restore_backup(target_path=self.dst_path, | |
203cb25e | 442 | backup_tar_path=backup_full) |
96fe6399 PG |
443 | except tarfile.CompressionError: |
444 | if self.PASSWORD is not None or self.COMPRESSION is not None: | |
445 | pass | |
00b8c150 PG |
446 | else: |
447 | raise | |
96fe6399 | 448 | except tarfile.ReadError: |
00b8c150 PG |
449 | # can happen with all three modes |
450 | pass | |
451 | except tarfile.DecryptionError: | |
452 | if self.PASSWORD is not None: | |
96fe6399 | 453 | pass |
00b8c150 PG |
454 | else: |
455 | raise | |
96fe6399 PG |
456 | |
457 | os.chdir (self.pwd) # not restored due to the error above | |
203cb25e | 458 | # but recover will succeed |
96fe6399 | 459 | failed = dtar.recover_backup(target_path=self.dst_path, |
b15e549b PG |
460 | backup_indexes_paths=[ |
461 | "%s/%s" % (bak_path, index_file) | |
462 | ]) | |
96fe6399 PG |
463 | |
464 | assert len (failed) == self.FAILURES | |
203cb25e PG |
465 | |
466 | # with one file missing | |
9d89c237 PG |
467 | missing = [] |
468 | mismatch = [] | |
203cb25e | 469 | for key, value in self.hash.items (): |
96fe6399 | 470 | kkey = "%s/%s" % (self.dst_path, key) |
b15e549b | 471 | if os.path.exists (kkey): |
9d89c237 PG |
472 | if value != self.md5sum (kkey): |
473 | mismatch.append (key) | |
203cb25e | 474 | else: |
757319dd | 475 | missing.append (key) |
4d4925de PG |
476 | |
477 | # usually, an object whose extraction fails will not be found on | |
478 | # disk afterwards so the number of failures equals that of missing | |
479 | # files. however, some modes will create partial files for objects | |
480 | # spanning multiple volumes that contain the parts whose checksums | |
481 | # were valid. | |
482 | assert len (missing) == (self.MISSING if self.MISSING is not None | |
483 | else self.FAILURES) | |
9d89c237 | 484 | assert len (mismatch) == self.MISMATCHES |
96fe6399 PG |
485 | |
486 | shutil.rmtree (self.dst_path) | |
487 | ||
488 | ||
0c6682ce PG |
489 | class RescueTest (DefectiveTest): |
490 | """ | |
491 | Rescue: restore corrupt backups from backup set that is damaged to a degree | |
492 | that the index file is worthless. | |
493 | """ | |
494 | ||
495 | def test_rescue_corrupt (self): | |
496 | """ | |
497 | Perform various damaging actions that cause unreadable objects, then | |
498 | attempt to extract objects regardless. | |
499 | """ | |
2fe5f6e7 PG |
500 | mode = self.COMPRESSION or "#" |
501 | bak_path, backup_file, backup_full, index_file = \ | |
502 | self.gen_file_names (self.COMPRESSION, self.PASSWORD) | |
0c6682ce PG |
503 | |
504 | if self.VOLUMES > 1: | |
047239f3 | 505 | self.gen_multivol (self.VOLUMES) |
0c6682ce | 506 | |
2fe5f6e7 | 507 | vname = partial (self.default_volume_name, backup_file) |
0c6682ce PG |
508 | dtar = deltatar.DeltaTar (mode=mode, |
509 | logger=None, | |
510 | password=self.PASSWORD, | |
511 | index_name_func=lambda _: index_file, | |
512 | volume_name_func=vname) | |
513 | ||
514 | dtar.create_full_backup \ | |
515 | (source_path=self.src_path, backup_path=bak_path, | |
516 | max_volume_size=1) | |
517 | ||
518 | if self.PASSWORD is not None: | |
519 | # ensure all files are at least superficially in PDT format | |
520 | for f in os.listdir (bak_path): | |
521 | assert is_pdt_encrypted ("%s/%s" % (bak_path, f)) | |
522 | ||
523 | # first restore must succeed | |
524 | dtar.restore_backup(target_path=self.dst_path, | |
525 | backup_indexes_paths=[ | |
526 | "%s/%s" % (bak_path, index_file) | |
527 | ]) | |
528 | for key, value in self.hash.items (): | |
529 | f = "%s/%s" % (self.dst_path, key) | |
530 | assert os.path.exists (f) | |
531 | assert value == self.md5sum (f) | |
532 | shutil.rmtree (self.dst_path) | |
533 | shutil.rmtree (self.src_path) | |
534 | ||
535 | self.CORRUPT (backup_full, | |
536 | self.COMPRESSION is not None, | |
537 | self.PASSWORD is not None) | |
538 | ||
539 | # normal restore must fail | |
540 | try: | |
541 | dtar.restore_backup(target_path=self.dst_path, | |
542 | backup_tar_path=backup_full) | |
543 | except tarfile.CompressionError: | |
544 | if self.PASSWORD is not None or self.COMPRESSION is not None: | |
545 | pass | |
546 | else: | |
547 | raise | |
548 | except tarfile.ReadError: | |
549 | # can happen with all three modes | |
550 | pass | |
551 | except tarfile.DecryptionError: | |
552 | if self.PASSWORD is not None: | |
553 | pass | |
554 | else: | |
555 | raise | |
556 | ||
557 | os.chdir (self.pwd) # not restored due to the error above | |
558 | # but recover will succeed | |
559 | failed = dtar.rescue_backup(target_path=self.dst_path, | |
2fe5f6e7 | 560 | backup_tar_path=backup_full) |
0c6682ce PG |
561 | # with one file missing |
562 | missing = [] | |
563 | mismatch = [] | |
564 | for key, value in self.hash.items (): | |
565 | kkey = "%s/%s" % (self.dst_path, key) | |
566 | if os.path.exists (kkey): | |
567 | if value != self.md5sum (kkey): | |
568 | mismatch.append (key) | |
569 | else: | |
570 | missing.append (key) | |
571 | ||
79bc14cf | 572 | assert len (failed) == self.FAILURES |
2fe5f6e7 PG |
573 | assert len (missing) == (self.MISSING if self.MISSING is not None |
574 | else self.FAILURES) | |
0c6682ce PG |
575 | assert len (mismatch) == self.MISMATCHES |
576 | ||
577 | shutil.rmtree (self.dst_path) | |
578 | ||
579 | ||
2fe5f6e7 PG |
580 | class GenIndexTest (DefectiveTest): |
581 | """ | |
582 | Deducing an index for a backup with tarfile. | |
583 | """ | |
584 | ||
585 | def test_gen_index (self): | |
586 | """ | |
587 | Create backup, leave it unharmed, then generate an index. | |
588 | """ | |
589 | mode = self.COMPRESSION or "#" | |
590 | bak_path, backup_file, backup_full, index_file = \ | |
591 | self.gen_file_names (self.COMPRESSION, self.PASSWORD) | |
592 | ||
047239f3 PG |
593 | if self.VOLUMES > 1: |
594 | self.gen_multivol (self.VOLUMES) | |
595 | ||
2fe5f6e7 PG |
596 | vname = partial (self.default_volume_name, backup_file) |
597 | dtar = deltatar.DeltaTar (mode=mode, | |
598 | logger=None, | |
599 | password=self.PASSWORD, | |
600 | index_name_func=lambda _: index_file, | |
601 | volume_name_func=vname) | |
602 | ||
603 | dtar.create_full_backup \ | |
604 | (source_path=self.src_path, backup_path=bak_path, | |
605 | max_volume_size=1) | |
606 | ||
27ee4dd4 PG |
607 | def gen_volume_name (nvol): |
608 | return os.path.join (bak_path, vname (backup_full, True, nvol)) | |
609 | ||
610 | psidx = tarfile.gen_rescue_index (gen_volume_name, | |
611 | mode, | |
612 | password=self.PASSWORD) | |
2fe5f6e7 | 613 | |
047239f3 PG |
614 | # correct for objects spanning volumes: these are treated as separate |
615 | # in the index! | |
616 | assert len (psidx) - self.VOLUMES + 1 == len (self.hash) | |
2fe5f6e7 PG |
617 | |
618 | ||
619 | ############################################################################### | |
620 | # rescue | |
621 | ############################################################################### | |
622 | ||
e25f31ac | 623 | class RecoverCorruptPayloadTestBase (RecoverTest): |
00b8c150 PG |
624 | COMPRESSION = None |
625 | PASSWORD = None | |
9d89c237 PG |
626 | FAILURES = 0 # tarfile will restore but corrupted, as |
627 | MISMATCHES = 1 # revealed by the hash | |
00b8c150 | 628 | |
e25f31ac PG |
629 | class RecoverCorruptPayloadSingleTest (RecoverCorruptPayloadTestBase): |
630 | VOLUMES = 1 | |
631 | ||
632 | class RecoverCorruptPayloadMultiTest (RecoverCorruptPayloadTestBase): | |
633 | VOLUMES = 3 | |
634 | ||
00b8c150 | 635 | |
e25f31ac | 636 | class RecoverCorruptPayloadGZTestBase (RecoverTest): |
00b8c150 PG |
637 | COMPRESSION = "#gz" |
638 | PASSWORD = None | |
639 | FAILURES = 1 | |
9d89c237 | 640 | MISMATCHES = 0 |
00b8c150 | 641 | |
e25f31ac PG |
642 | class RecoverCorruptPayloadGZSingleTest (RecoverCorruptPayloadGZTestBase): |
643 | VOLUMES = 1 | |
00b8c150 | 644 | |
e25f31ac PG |
645 | class RecoverCorruptPayloadGZMultiTest (RecoverCorruptPayloadGZTestBase): |
646 | VOLUMES = 3 | |
647 | ||
648 | ||
649 | class RecoverCorruptPayloadGZAESTestBase (RecoverTest): | |
00b8c150 PG |
650 | COMPRESSION = "#gz" |
651 | PASSWORD = TEST_PASSWORD | |
652 | FAILURES = 1 | |
9d89c237 | 653 | MISMATCHES = 0 |
00b8c150 | 654 | |
e25f31ac PG |
655 | class RecoverCorruptPayloadGZAESSingleTest (RecoverCorruptPayloadGZAESTestBase): |
656 | VOLUMES = 1 | |
657 | ||
658 | class RecoverCorruptPayloadGZAESMultiTest (RecoverCorruptPayloadGZAESTestBase): | |
659 | VOLUMES = 3 | |
00b8c150 | 660 | |
e25f31ac PG |
661 | |
662 | class RecoverCorruptHeaderTestBase (RecoverTest): | |
0349168a PG |
663 | COMPRESSION = None |
664 | PASSWORD = None | |
665 | FAILURES = 1 | |
666 | CORRUPT = corrupt_header | |
9d89c237 | 667 | MISMATCHES = 0 |
0349168a | 668 | |
e25f31ac PG |
669 | class RecoverCorruptHeaderSingleTest (RecoverCorruptHeaderTestBase): |
670 | VOLUMES = 1 | |
671 | ||
672 | class RecoverCorruptHeaderMultiTest (RecoverCorruptHeaderTestBase): | |
673 | VOLUMES = 3 | |
674 | ||
0349168a | 675 | |
e25f31ac | 676 | class RecoverCorruptHeaderGZTestBase (RecoverTest): |
96fe6399 PG |
677 | COMPRESSION = "#gz" |
678 | PASSWORD = None | |
679 | FAILURES = 1 | |
00b8c150 | 680 | CORRUPT = corrupt_header |
9d89c237 | 681 | MISMATCHES = 0 |
96fe6399 | 682 | |
e25f31ac PG |
683 | class RecoverCorruptHeaderGZSingleTest (RecoverCorruptHeaderGZTestBase): |
684 | VOLUMES = 1 | |
3267933a | 685 | |
e25f31ac PG |
686 | class RecoverCorruptHeaderGZMultiTest (RecoverCorruptHeaderGZTestBase): |
687 | VOLUMES = 3 | |
688 | ||
689 | ||
690 | class RecoverCorruptHeaderGZAESTestBase (RecoverTest): | |
96fe6399 PG |
691 | COMPRESSION = "#gz" |
692 | PASSWORD = TEST_PASSWORD | |
693 | FAILURES = 1 | |
00b8c150 | 694 | CORRUPT = corrupt_header |
9d89c237 | 695 | MISMATCHES = 0 |
fbdc9f4a | 696 | |
e25f31ac PG |
697 | class RecoverCorruptHeaderGZAESSingleTest (RecoverCorruptHeaderGZAESTestBase): |
698 | VOLUMES = 1 | |
699 | ||
700 | class RecoverCorruptHeaderGZAESMultiTest (RecoverCorruptHeaderGZAESTestBase): | |
701 | VOLUMES = 3 | |
da8996f0 | 702 | |
e25f31ac PG |
703 | |
704 | class RecoverCorruptEntireHeaderTestBase (RecoverTest): | |
da8996f0 PG |
705 | COMPRESSION = None |
706 | PASSWORD = None | |
707 | FAILURES = 1 | |
708 | CORRUPT = corrupt_entire_header | |
9d89c237 | 709 | MISMATCHES = 0 |
da8996f0 | 710 | |
e25f31ac PG |
711 | class RecoverCorruptEntireHeaderSingleTest (RecoverCorruptEntireHeaderTestBase): |
712 | VOLUMES = 1 | |
713 | ||
714 | class RecoverCorruptEntireHeaderMultiTest (RecoverCorruptEntireHeaderTestBase): | |
715 | VOLUMES = 3 | |
716 | ||
da8996f0 | 717 | |
e25f31ac | 718 | class RecoverCorruptEntireHeaderGZTestBase (RecoverTest): |
da8996f0 PG |
719 | COMPRESSION = "#gz" |
720 | PASSWORD = None | |
721 | FAILURES = 1 | |
722 | CORRUPT = corrupt_entire_header | |
9d89c237 | 723 | MISMATCHES = 0 |
da8996f0 | 724 | |
e25f31ac PG |
725 | class RecoverCorruptEntireHeaderGZSingleTest (RecoverCorruptEntireHeaderGZTestBase): |
726 | VOLUMES = 1 | |
da8996f0 | 727 | |
e25f31ac PG |
728 | class RecoverCorruptEntireHeaderGZMultiTest (RecoverCorruptEntireHeaderGZTestBase): |
729 | VOLUMES = 3 | |
730 | ||
731 | ||
732 | class RecoverCorruptEntireHeaderGZAESTestBase (RecoverTest): | |
da8996f0 PG |
733 | COMPRESSION = "#gz" |
734 | PASSWORD = TEST_PASSWORD | |
735 | FAILURES = 1 | |
736 | CORRUPT = corrupt_entire_header | |
9d89c237 | 737 | MISMATCHES = 0 |
da8996f0 | 738 | |
e25f31ac PG |
739 | class RecoverCorruptEntireHeaderGZAESSingleTest (RecoverCorruptEntireHeaderGZAESTestBase): |
740 | VOLUMES = 1 | |
741 | ||
742 | class RecoverCorruptEntireHeaderGZAESMultiTest (RecoverCorruptEntireHeaderGZAESTestBase): | |
743 | VOLUMES = 3 | |
517d35b7 | 744 | |
e25f31ac PG |
745 | |
746 | class RecoverCorruptTrailingDataTestBase (RecoverTest): | |
517d35b7 PG |
747 | # plain Tar is indifferent against traling data and the results |
748 | # are consistent | |
749 | COMPRESSION = None | |
750 | PASSWORD = None | |
751 | FAILURES = 0 | |
752 | CORRUPT = corrupt_trailing_data | |
753 | MISMATCHES = 0 | |
754 | ||
e25f31ac PG |
755 | class RecoverCorruptTrailingDataSingleTest (RecoverCorruptTrailingDataTestBase): |
756 | VOLUMES = 1 | |
757 | ||
758 | class RecoverCorruptTrailingDataMultiTest (RecoverCorruptTrailingDataTestBase): | |
14895f4b PG |
759 | # the last object in first archive has extra bytes somewhere in the |
760 | # middle because tar itself performs no data checksumming. | |
761 | MISMATCHES = 1 | |
e25f31ac PG |
762 | VOLUMES = 3 |
763 | ||
517d35b7 | 764 | |
e25f31ac | 765 | class RecoverCorruptTrailingDataGZTestBase (RecoverTest): |
517d35b7 PG |
766 | # reading past the final object will cause decompression failure; |
767 | # all objects except for the last survive unharmed though | |
768 | COMPRESSION = "#gz" | |
769 | PASSWORD = None | |
770 | FAILURES = 1 | |
771 | CORRUPT = corrupt_trailing_data | |
772 | MISMATCHES = 0 | |
773 | ||
e25f31ac PG |
774 | class RecoverCorruptTrailingDataGZSingleTest (RecoverCorruptTrailingDataGZTestBase): |
775 | VOLUMES = 1 | |
517d35b7 | 776 | |
e25f31ac PG |
777 | class RecoverCorruptTrailingDataGZMultiTest (RecoverCorruptTrailingDataGZTestBase): |
778 | VOLUMES = 3 | |
14895f4b PG |
779 | # the last file of the first volume will only contain the data of the |
780 | # second part which is contained in the second volume. this happens | |
781 | # because the CRC32 is wrong for the first part so it gets discarded, then | |
782 | # the object is recreated from the first header of the second volume, | |
783 | # containing only the remainder of the data. | |
784 | MISMATCHES = 1 | |
4d4925de | 785 | MISSING = 0 |
e25f31ac PG |
786 | |
787 | ||
788 | class RecoverCorruptTrailingDataGZAESTestBase (RecoverTest): | |
517d35b7 PG |
789 | COMPRESSION = "#gz" |
790 | PASSWORD = TEST_PASSWORD | |
791 | FAILURES = 0 | |
792 | CORRUPT = corrupt_trailing_data | |
793 | MISMATCHES = 0 | |
794 | ||
e25f31ac PG |
795 | class RecoverCorruptTrailingDataGZAESSingleTest (RecoverCorruptTrailingDataGZAESTestBase): |
796 | VOLUMES = 1 | |
797 | ||
798 | class RecoverCorruptTrailingDataGZAESMultiTest (RecoverCorruptTrailingDataGZAESTestBase): | |
799 | VOLUMES = 3 | |
517d35b7 | 800 | |
20e1d773 PG |
801 | |
802 | class RecoverCorruptVolumeBaseTest (RecoverTest): | |
803 | COMPRESSION = None | |
804 | PASSWORD = None | |
805 | FAILURES = 8 | |
806 | CORRUPT = corrupt_volume | |
807 | VOLUMES = 3 | |
808 | ||
809 | class RecoverCorruptVolumeTest (RecoverCorruptVolumeBaseTest): | |
810 | pass | |
811 | ||
3692fd82 PG |
812 | class RecoverCorruptVolumeGZTest (RecoverCorruptVolumeBaseTest): |
813 | COMPRESSION = "#gz" | |
814 | ||
815 | class RecoverCorruptVolumeGZAESTest (RecoverCorruptVolumeBaseTest): | |
20e1d773 | 816 | COMPRESSION = "#gz" |
3692fd82 PG |
817 | PASSWORD = TEST_PASSWORD |
818 | ||
819 | ||
b9cf4a0f | 820 | @unittest.skipIf(sys.version_info < (3, 4), "requires recent os library") |
3692fd82 PG |
821 | class RecoverCorruptHoleBaseTest (RecoverTest): |
822 | """ | |
823 | Cut bytes from the middle of a volume. | |
824 | ||
825 | Index-based recovery works only up to the hole. | |
826 | """ | |
827 | COMPRESSION = None | |
20e1d773 | 828 | PASSWORD = None |
3692fd82 PG |
829 | FAILURES = 3 |
830 | CORRUPT = corrupt_hole | |
831 | VOLUMES = 2 # request two vols to swell up the first one | |
832 | MISMATCHES = 1 | |
833 | ||
b9cf4a0f | 834 | @unittest.skipIf(sys.version_info < (3, 4), "requires recent os library") |
3692fd82 PG |
835 | class RecoverCorruptHoleTest (RecoverCorruptHoleBaseTest): |
836 | FAILURES = 2 | |
837 | ||
b9cf4a0f | 838 | @unittest.skipIf(sys.version_info < (3, 4), "requires recent os library") |
3692fd82 PG |
839 | class RecoverCorruptHoleGZTest (RecoverCorruptHoleBaseTest): |
840 | COMPRESSION = "#gz" | |
841 | MISSING = 2 | |
20e1d773 | 842 | |
b9cf4a0f | 843 | @unittest.skipIf(sys.version_info < (3, 4), "requires recent os library") |
3692fd82 | 844 | class RecoverCorruptHoleGZAESTest (RecoverCorruptHoleBaseTest): |
20e1d773 PG |
845 | COMPRESSION = "#gz" |
846 | PASSWORD = TEST_PASSWORD | |
3692fd82 | 847 | MISSING = 2 |
20e1d773 | 848 | |
2fe5f6e7 PG |
849 | ############################################################################### |
850 | # rescue | |
851 | ############################################################################### | |
852 | ||
b9cf4a0f | 853 | @unittest.skipIf(sys.version_info < (3, 4), "requires recent os library") |
2fe5f6e7 PG |
854 | class RescueCorruptHoleBaseTest (RescueTest): |
855 | """ | |
856 | Cut bytes from the middle of a volume. | |
857 | """ | |
858 | COMPRESSION = None | |
859 | PASSWORD = None | |
79bc14cf | 860 | FAILURES = 0 |
2fe5f6e7 PG |
861 | CORRUPT = corrupt_hole |
862 | VOLUMES = 2 # request two vols to swell up the first one | |
79bc14cf PG |
863 | MISMATCHES = 2 # intersected by hole |
864 | MISSING = 1 # excised by hole | |
2fe5f6e7 | 865 | |
b9cf4a0f | 866 | @unittest.skipIf(sys.version_info < (3, 4), "requires recent os library") |
2fe5f6e7 | 867 | class RescueCorruptHoleTest (RescueCorruptHoleBaseTest): |
79bc14cf | 868 | pass |
2fe5f6e7 | 869 | |
b9cf4a0f | 870 | @unittest.skipIf(sys.version_info < (3, 4), "requires recent os library") |
2fe5f6e7 PG |
871 | class RescueCorruptHoleGZTest (RescueCorruptHoleBaseTest): |
872 | COMPRESSION = "#gz" | |
79bc14cf PG |
873 | # the decompressor explodes in our face processing the first dummy, nothing |
874 | # we can do to recover | |
875 | FAILURES = 1 | |
2fe5f6e7 | 876 | |
b9cf4a0f | 877 | @unittest.skipIf(sys.version_info < (3, 4), "requires recent os library") |
2fe5f6e7 PG |
878 | class RescueCorruptHoleGZAESTest (RescueCorruptHoleBaseTest): |
879 | COMPRESSION = "#gz" | |
880 | PASSWORD = TEST_PASSWORD | |
79bc14cf PG |
881 | # again, ignoring the crypto errors yields a bad zlib stream causing the |
882 | # decompressor to abort where the hole begins; the file is extracted up | |
883 | # to this point though | |
884 | FAILURES = 1 | |
2fe5f6e7 | 885 | |
0c8baf2b | 886 | |
afb2d647 | 887 | class RescueCorruptHeaderCTSizeGZAESTest (RescueTest): |
0c8baf2b PG |
888 | COMPRESSION = "#gz" |
889 | PASSWORD = TEST_PASSWORD | |
890 | FAILURES = 0 | |
891 | CORRUPT = corrupt_ctsize | |
892 | MISMATCHES = 0 | |
893 | ||
894 | ||
b9cf4a0f | 895 | @unittest.skipIf(sys.version_info < (3, 4), "requires recent os library") |
afb2d647 PG |
896 | class RescueCorruptLeadingGarbageTestBase (RescueTest): |
897 | # plain Tar is indifferent against traling data and the results | |
898 | # are consistent | |
899 | COMPRESSION = None | |
900 | PASSWORD = None | |
901 | FAILURES = 0 | |
902 | CORRUPT = corrupt_leading_garbage | |
903 | MISMATCHES = 0 | |
904 | ||
b9cf4a0f | 905 | @unittest.skipIf(sys.version_info < (3, 4), "requires recent os library") |
afb2d647 PG |
906 | class RescueCorruptLeadingGarbageSingleTest (RescueCorruptLeadingGarbageTestBase): |
907 | VOLUMES = 1 | |
908 | ||
b9cf4a0f | 909 | @unittest.skipIf(sys.version_info < (3, 4), "requires recent os library") |
afb2d647 PG |
910 | class RescueCorruptLeadingGarbageMultiTest (RescueCorruptLeadingGarbageTestBase): |
911 | # the last object in first archive has extra bytes somewhere in the | |
912 | # middle because tar itself performs no data checksumming. | |
913 | MISMATCHES = 2 | |
914 | VOLUMES = 3 | |
915 | ||
916 | ||
2fe5f6e7 PG |
917 | ############################################################################### |
918 | # index | |
919 | ############################################################################### | |
920 | ||
921 | class GenIndexIntactBaseTest (GenIndexTest): | |
922 | """ | |
923 | """ | |
924 | COMPRESSION = None | |
925 | PASSWORD = None | |
926 | FAILURES = 0 | |
927 | CORRUPT = immaculate | |
928 | VOLUMES = 1 | |
929 | MISMATCHES = 1 | |
930 | ||
047239f3 PG |
931 | class GenIndexIntactSingleTest (GenIndexIntactBaseTest): |
932 | pass | |
933 | ||
934 | class GenIndexIntactSingleGZTest (GenIndexIntactBaseTest): | |
935 | COMPRESSION = "#gz" | |
936 | MISSING = 2 | |
937 | ||
938 | class GenIndexIntactSingleGZAESTest (GenIndexIntactBaseTest): | |
939 | COMPRESSION = "#gz" | |
940 | PASSWORD = TEST_PASSWORD | |
941 | MISSING = 2 | |
2fe5f6e7 | 942 | |
047239f3 PG |
943 | class GenIndexIntactMultiTest (GenIndexIntactBaseTest): |
944 | VOLUMES = 3 | |
2fe5f6e7 PG |
945 | pass |
946 | ||
047239f3 PG |
947 | class GenIndexIntactMultiGZTest (GenIndexIntactBaseTest): |
948 | VOLUMES = 3 | |
2fe5f6e7 PG |
949 | COMPRESSION = "#gz" |
950 | MISSING = 2 | |
951 | ||
047239f3 PG |
952 | class GenIndexIntactMultiGZAESTest (GenIndexIntactBaseTest): |
953 | VOLUMES = 3 | |
2fe5f6e7 PG |
954 | COMPRESSION = "#gz" |
955 | PASSWORD = TEST_PASSWORD | |
956 | MISSING = 2 | |
957 | ||
6e1f5355 PG |
958 | |
959 | class GenIndexCorruptHoleBaseTest (GenIndexTest): | |
960 | """ | |
961 | Recreate index from file with hole. | |
962 | """ | |
963 | COMPRESSION = None | |
964 | PASSWORD = None | |
965 | FAILURES = 0 | |
966 | CORRUPT = corrupt_hole | |
967 | VOLUMES = 1 | |
968 | MISMATCHES = 1 | |
969 | ||
970 | class GenIndexCorruptHoleTest (GenIndexCorruptHoleBaseTest): | |
971 | pass | |
972 | ||
973 | class GenIndexCorruptHoleGZTest (GenIndexCorruptHoleBaseTest): | |
974 | COMPRESSION = "#gz" | |
975 | MISSING = 2 | |
976 | ||
977 | class GenIndexCorruptHoleGZAESTest (GenIndexCorruptHoleBaseTest): | |
978 | COMPRESSION = "#gz" | |
979 | PASSWORD = TEST_PASSWORD | |
980 | MISSING = 2 | |
981 | ||
982 | ||
983 | ||
984 | class GenIndexCorruptEntireHeaderBaseTest (GenIndexTest): | |
985 | """ | |
986 | Recreate index from file with hole. | |
987 | """ | |
988 | COMPRESSION = None | |
989 | PASSWORD = None | |
990 | FAILURES = 0 | |
991 | CORRUPT = corrupt_entire_header | |
992 | VOLUMES = 1 | |
993 | MISMATCHES = 1 | |
994 | ||
995 | class GenIndexCorruptEntireHeaderTest (GenIndexCorruptEntireHeaderBaseTest): | |
996 | pass | |
997 | ||
998 | class GenIndexCorruptEntireHeaderGZTest (GenIndexCorruptEntireHeaderBaseTest): | |
999 | COMPRESSION = "#gz" | |
1000 | MISSING = 2 | |
1001 | ||
1002 | class GenIndexCorruptEntireHeaderGZAESTest (GenIndexCorruptEntireHeaderBaseTest): | |
1003 | COMPRESSION = "#gz" | |
1004 | PASSWORD = TEST_PASSWORD | |
1005 | MISSING = 2 | |
1006 |