add test corrupting an entire volume
[python-delta-tar] / testing / test_recover.py
CommitLineData
fbdc9f4a
PG
1import logging
2import os
3import shutil
4
5import deltatar.deltatar as deltatar
3267933a 6import deltatar.crypto as crypto
203cb25e 7import deltatar.tarfile as tarfile
fbdc9f4a
PG
8
9from . import BaseTest
10
e25f31ac 11TEST_PASSWORD = "test1234"
85e7013f 12TEST_VOLSIZ = 2 # MB
e25f31ac 13TEST_FILESPERVOL = 3
85e7013f
PG
14VOLUME_OVERHEAD = 1.4 # account for tar overhead when fitting files into
15 # volumes; this is black magic
20e1d773 16TEST_BLOCKSIZE = 4096
96fe6399
PG
17
18###############################################################################
19## helpers ##
20###############################################################################
21
3267933a
PG
22def flip_bits (fname, off, b=0x01, n=1):
23 """
24 Open file *fname* at offset *off*, replacing the next *n* bytes with
25 their values xor’ed with *b*.
26 """
27 fd = os.open (fname, os.O_RDWR)
203cb25e 28
3267933a
PG
29 try:
30 pos = os.lseek (fd, off, os.SEEK_SET)
31 assert pos == off
32 chunk = os.read (fd, n)
33 chunk = bytes (map (lambda v: v ^ b, chunk))
da8996f0
PG
34 pos = os.lseek (fd, off, os.SEEK_SET)
35 assert pos == off
3267933a
PG
36 os.write (fd, chunk)
37 finally:
38 os.close (fd)
39
203cb25e
PG
40
41def gz_header_size (fname, off=0):
42 """
43 Determine the length of the gzip header starting at *off* in file fname.
44
45 The header is variable length because it may contain the filename as NUL
46 terminated bytes.
47 """
48 # length so we need to determine where the actual payload starts
49 off = tarfile.GZ_HEADER_SIZE
50 fd = os.open (fname, os.O_RDONLY)
51
52 try:
53 pos = os.lseek (fd, off, os.SEEK_SET)
54 assert pos == off
55 while os.read (fd, 1)[0] != 0:
56 off += 1
57 pos = os.lseek (fd, off, os.SEEK_SET)
58 assert pos == off
59 finally:
60 os.close (fd)
61
62 return off
63
da8996f0 64
96fe6399
PG
65def is_pdt_encrypted (fname):
66 """
67 Returns true if the file contains at least one PDT header plus enough
68 space for the object.
69 """
70 try:
71 with open (fname, "rb") as st:
72 hdr = crypto.hdr_read_stream (st)
73 siz = hdr ["ctsize"]
74 assert (len (st.read (siz)) == siz)
75 except Exception as exn:
76 return False
77 return True
78
79
00b8c150
PG
80def corrupt_header (_, fname, compress, encrypt):
81 """
82 Modify a significant byte in the object header of the format.
83 """
84 if encrypt is True: # damage GCM tag
85 flip_bits (fname, crypto.HDR_OFF_TAG + 1)
86 elif compress is True: # invalidate magic
87 flip_bits (fname, 1)
88 else: # Fudge checksum. From tar(5):
89 #
90 # struct header_gnu_tar {
91 # char name[100];
92 # char mode[8];
93 # char uid[8];
94 # char gid[8];
95 # char size[12];
96 # char mtime[12];
97 # char checksum[8];
98 # …
99 flip_bits (fname, 100 + 8 + 8 + 8 + 12 + 12 + 1)
100
101
da8996f0
PG
102def corrupt_entire_header (_, fname, compress, encrypt):
103 """
104 Flip all bits in the first object header.
105 """
106 if encrypt is True:
107 flip_bits (fname, 0, 0xff, crypto.PDTCRYPT_HDR_SIZE)
108 elif compress is True: # invalidate magic
109 flip_bits (fname, 0, 0xff, gz_header_size (fname))
110 else:
111 flip_bits (fname, 0, 0xff, tarfile.BLOCKSIZE)
112
113
00b8c150
PG
114def corrupt_payload_start (_, fname, compress, encrypt):
115 """
116 Modify the byte following the object header structure of the format.
117 """
118 if encrypt is True:
119 flip_bits (fname, crypto.PDTCRYPT_HDR_SIZE + 1)
120 elif compress is True:
121 flip_bits (fname, gz_header_size (fname) + 1)
122 else:
123 flip_bits (fname, tarfile.BLOCKSIZE + 1)
124
125
517d35b7
PG
126def corrupt_trailing_data (_, fname, compress, encrypt):
127 """
128 Modify the byte following the object header structure of the format.
129 """
130 junk = os.urandom (42)
131 fd = os.open (fname, os.O_WRONLY | os.O_APPEND)
132 os.write (fd, junk)
133 os.close (fd)
134
00b8c150 135
20e1d773
PG
136def corrupt_volume (_, fname, compress, encrypt):
137 """
138 Zero out an entire volume.
139 """
140 fd = os.open (fname, os.O_WRONLY)
141 size = os.lseek (fd, 0, os.SEEK_END)
142 assert os.lseek (fd, 0, os.SEEK_SET) == 0
143 zeros = bytes (b'\x00' * TEST_BLOCKSIZE)
144 while size > 0:
145 todo = min (size, TEST_BLOCKSIZE)
146 os.write (fd, zeros [:todo])
147 size -= todo
148 os.close (fd)
149
150
96fe6399
PG
151###############################################################################
152## tests ##
153###############################################################################
203cb25e 154
fbdc9f4a
PG
155class RecoverTest (BaseTest):
156 """
157 Disaster recovery: restore corrupt backups.
158 """
159
96fe6399
PG
160 COMPRESSION = None
161 PASSWORD = None
9d89c237
PG
162 FAILURES = 0 # files that could not be restored
163 MISMATCHES = 0 # files that were restored but corrupted
00b8c150 164 CORRUPT = corrupt_payload_start
e25f31ac 165 VOLUMES = 1
4d4925de 166 MISSING = None # normally the number of failures
96fe6399 167
fbdc9f4a
PG
168
169 def setUp(self):
170 '''
171 Create base test data
172 '''
96fe6399
PG
173 self.pwd = os.getcwd()
174 self.dst_path = "source_dir"
175 self.src_path = "%s2" % self.dst_path
176 self.hash = dict()
177
fbdc9f4a 178 os.system('rm -rf target_dir source_dir* backup_dir* huge')
96fe6399 179 os.makedirs (self.src_path)
fbdc9f4a 180
96fe6399 181 for i in range (5):
85e7013f 182 f = "dummy_%d" % i
96fe6399
PG
183 self.hash [f] = self.create_file ("%s/%s"
184 % (self.src_path, f), 5 + i)
fbdc9f4a 185
96fe6399
PG
186
187 def tearDown(self):
188 '''
189 Remove temporal files created by unit tests and reset globals.
190 '''
191 os.chdir(self.pwd)
192 os.system("rm -rf source_dir source_dir2 backup_dir*")
fbdc9f4a
PG
193
194
da8996f0 195 def test_recover_corrupt (self):
fbdc9f4a 196 """
da8996f0 197 Perform various damaging actions that cause unreadable objects.
fbdc9f4a
PG
198
199 Expects the extraction to fail in normal mode. With disaster recovery,
200 extraction must succeed, and exactly one file must be missing.
201 """
96fe6399 202 mode = self.COMPRESSION or "#"
203cb25e 203 bak_path = "backup_dir"
e25f31ac
PG
204 backup_file = "the_full_backup_%0.2d.tar"
205 backup_full = ("%s/%s" % (bak_path, backup_file)) % 0
96fe6399
PG
206 index_file = "the_full_index"
207
208 if self.COMPRESSION is not None:
209 backup_file += ".gz"
210 backup_full += ".gz"
211 index_file += ".gz"
212
213 if self.PASSWORD is not None:
e25f31ac
PG
214 backup_file = "%s.%s" % (backup_file, deltatar.PDTCRYPT_EXTENSION)
215 backup_full = "%s.%s" % (backup_full, deltatar.PDTCRYPT_EXTENSION)
216 index_file = "%s.%s" % (index_file , deltatar.PDTCRYPT_EXTENSION)
217
218 if self.VOLUMES > 1:
85e7013f
PG
219 # add n files for one nth the volume size each, corrected
220 # for metadata and tar block overhead
221 fsiz = int ( ( TEST_VOLSIZ
222 / (TEST_FILESPERVOL * VOLUME_OVERHEAD))
223 * 1024 * 1024)
224 fcnt = (self.VOLUMES - 1) * TEST_FILESPERVOL
e25f31ac
PG
225 for i in range (fcnt):
226 nvol, invol = divmod(i, TEST_FILESPERVOL)
227 f = "dummy_vol_%d_n_%0.2d" % (nvol, invol)
228 self.hash [f] = self.create_file ("%s/%s"
229 % (self.src_path, f),
85e7013f
PG
230 fsiz,
231 random=True)
e25f31ac
PG
232
233 def vname (_x, _y, n, *a, **kwa):
234 return backup_file % n
fbdc9f4a 235
96fe6399
PG
236 dtar = deltatar.DeltaTar (mode=mode,
237 logger=None,
238 password=self.PASSWORD,
203cb25e 239 index_name_func=lambda _: index_file,
3267933a 240 volume_name_func=vname)
fbdc9f4a
PG
241
242 dtar.create_full_backup \
e25f31ac
PG
243 (source_path=self.src_path, backup_path=bak_path,
244 max_volume_size=1)
96fe6399
PG
245
246 if self.PASSWORD is not None:
247 # ensure all files are at least superficially in PDT format
248 for f in os.listdir (bak_path):
249 assert is_pdt_encrypted ("%s/%s" % (bak_path, f))
203cb25e
PG
250
251 # first restore must succeed
96fe6399 252 dtar.restore_backup(target_path=self.dst_path,
f090d35a
PG
253 backup_indexes_paths=[
254 "%s/%s" % (bak_path, index_file)
255 ])
203cb25e 256 for key, value in self.hash.items ():
96fe6399 257 f = "%s/%s" % (self.dst_path, key)
b15e549b
PG
258 assert os.path.exists (f)
259 assert value == self.md5sum (f)
96fe6399
PG
260 shutil.rmtree (self.dst_path)
261 shutil.rmtree (self.src_path)
203cb25e 262
00b8c150
PG
263 self.CORRUPT (backup_full,
264 self.COMPRESSION is not None,
265 self.PASSWORD is not None)
203cb25e
PG
266
267 # normal restore must fail
96fe6399
PG
268 try:
269 dtar.restore_backup(target_path=self.dst_path,
203cb25e 270 backup_tar_path=backup_full)
96fe6399
PG
271 except tarfile.CompressionError:
272 if self.PASSWORD is not None or self.COMPRESSION is not None:
273 pass
00b8c150
PG
274 else:
275 raise
96fe6399 276 except tarfile.ReadError:
00b8c150
PG
277 # can happen with all three modes
278 pass
279 except tarfile.DecryptionError:
280 if self.PASSWORD is not None:
96fe6399 281 pass
00b8c150
PG
282 else:
283 raise
96fe6399
PG
284
285 os.chdir (self.pwd) # not restored due to the error above
203cb25e 286 # but recover will succeed
96fe6399 287 failed = dtar.recover_backup(target_path=self.dst_path,
b15e549b
PG
288 backup_indexes_paths=[
289 "%s/%s" % (bak_path, index_file)
290 ])
96fe6399
PG
291
292 assert len (failed) == self.FAILURES
203cb25e
PG
293
294 # with one file missing
9d89c237
PG
295 missing = []
296 mismatch = []
203cb25e 297 for key, value in self.hash.items ():
96fe6399 298 kkey = "%s/%s" % (self.dst_path, key)
b15e549b 299 if os.path.exists (kkey):
9d89c237
PG
300 if value != self.md5sum (kkey):
301 mismatch.append (key)
203cb25e 302 else:
757319dd 303 missing.append (key)
4d4925de
PG
304
305 # usually, an object whose extraction fails will not be found on
306 # disk afterwards so the number of failures equals that of missing
307 # files. however, some modes will create partial files for objects
308 # spanning multiple volumes that contain the parts whose checksums
309 # were valid.
310 assert len (missing) == (self.MISSING if self.MISSING is not None
311 else self.FAILURES)
9d89c237 312 assert len (mismatch) == self.MISMATCHES
96fe6399
PG
313
314 shutil.rmtree (self.dst_path)
315
316
e25f31ac 317class RecoverCorruptPayloadTestBase (RecoverTest):
00b8c150
PG
318 COMPRESSION = None
319 PASSWORD = None
9d89c237
PG
320 FAILURES = 0 # tarfile will restore but corrupted, as
321 MISMATCHES = 1 # revealed by the hash
00b8c150 322
e25f31ac
PG
323class RecoverCorruptPayloadSingleTest (RecoverCorruptPayloadTestBase):
324 VOLUMES = 1
325
326class RecoverCorruptPayloadMultiTest (RecoverCorruptPayloadTestBase):
327 VOLUMES = 3
328
00b8c150 329
e25f31ac 330class RecoverCorruptPayloadGZTestBase (RecoverTest):
00b8c150
PG
331 COMPRESSION = "#gz"
332 PASSWORD = None
333 FAILURES = 1
9d89c237 334 MISMATCHES = 0
00b8c150 335
e25f31ac
PG
336class RecoverCorruptPayloadGZSingleTest (RecoverCorruptPayloadGZTestBase):
337 VOLUMES = 1
00b8c150 338
e25f31ac
PG
339class RecoverCorruptPayloadGZMultiTest (RecoverCorruptPayloadGZTestBase):
340 VOLUMES = 3
341
342
343class RecoverCorruptPayloadGZAESTestBase (RecoverTest):
00b8c150
PG
344 COMPRESSION = "#gz"
345 PASSWORD = TEST_PASSWORD
346 FAILURES = 1
9d89c237 347 MISMATCHES = 0
00b8c150 348
e25f31ac
PG
349class RecoverCorruptPayloadGZAESSingleTest (RecoverCorruptPayloadGZAESTestBase):
350 VOLUMES = 1
351
352class RecoverCorruptPayloadGZAESMultiTest (RecoverCorruptPayloadGZAESTestBase):
353 VOLUMES = 3
00b8c150 354
e25f31ac
PG
355
356class RecoverCorruptHeaderTestBase (RecoverTest):
0349168a
PG
357 COMPRESSION = None
358 PASSWORD = None
359 FAILURES = 1
360 CORRUPT = corrupt_header
9d89c237 361 MISMATCHES = 0
0349168a 362
e25f31ac
PG
363class RecoverCorruptHeaderSingleTest (RecoverCorruptHeaderTestBase):
364 VOLUMES = 1
365
366class RecoverCorruptHeaderMultiTest (RecoverCorruptHeaderTestBase):
367 VOLUMES = 3
368
0349168a 369
e25f31ac 370class RecoverCorruptHeaderGZTestBase (RecoverTest):
96fe6399
PG
371 COMPRESSION = "#gz"
372 PASSWORD = None
373 FAILURES = 1
00b8c150 374 CORRUPT = corrupt_header
9d89c237 375 MISMATCHES = 0
96fe6399 376
e25f31ac
PG
377class RecoverCorruptHeaderGZSingleTest (RecoverCorruptHeaderGZTestBase):
378 VOLUMES = 1
3267933a 379
e25f31ac
PG
380class RecoverCorruptHeaderGZMultiTest (RecoverCorruptHeaderGZTestBase):
381 VOLUMES = 3
382
383
384class RecoverCorruptHeaderGZAESTestBase (RecoverTest):
96fe6399
PG
385 COMPRESSION = "#gz"
386 PASSWORD = TEST_PASSWORD
387 FAILURES = 1
00b8c150 388 CORRUPT = corrupt_header
9d89c237 389 MISMATCHES = 0
fbdc9f4a 390
e25f31ac
PG
391class RecoverCorruptHeaderGZAESSingleTest (RecoverCorruptHeaderGZAESTestBase):
392 VOLUMES = 1
393
394class RecoverCorruptHeaderGZAESMultiTest (RecoverCorruptHeaderGZAESTestBase):
395 VOLUMES = 3
da8996f0 396
e25f31ac
PG
397
398class RecoverCorruptEntireHeaderTestBase (RecoverTest):
da8996f0
PG
399 COMPRESSION = None
400 PASSWORD = None
401 FAILURES = 1
402 CORRUPT = corrupt_entire_header
9d89c237 403 MISMATCHES = 0
da8996f0 404
e25f31ac
PG
405class RecoverCorruptEntireHeaderSingleTest (RecoverCorruptEntireHeaderTestBase):
406 VOLUMES = 1
407
408class RecoverCorruptEntireHeaderMultiTest (RecoverCorruptEntireHeaderTestBase):
409 VOLUMES = 3
410
da8996f0 411
e25f31ac 412class RecoverCorruptEntireHeaderGZTestBase (RecoverTest):
da8996f0
PG
413 COMPRESSION = "#gz"
414 PASSWORD = None
415 FAILURES = 1
416 CORRUPT = corrupt_entire_header
9d89c237 417 MISMATCHES = 0
da8996f0 418
e25f31ac
PG
419class RecoverCorruptEntireHeaderGZSingleTest (RecoverCorruptEntireHeaderGZTestBase):
420 VOLUMES = 1
da8996f0 421
e25f31ac
PG
422class RecoverCorruptEntireHeaderGZMultiTest (RecoverCorruptEntireHeaderGZTestBase):
423 VOLUMES = 3
424
425
426class RecoverCorruptEntireHeaderGZAESTestBase (RecoverTest):
da8996f0
PG
427 COMPRESSION = "#gz"
428 PASSWORD = TEST_PASSWORD
429 FAILURES = 1
430 CORRUPT = corrupt_entire_header
9d89c237 431 MISMATCHES = 0
da8996f0 432
e25f31ac
PG
433class RecoverCorruptEntireHeaderGZAESSingleTest (RecoverCorruptEntireHeaderGZAESTestBase):
434 VOLUMES = 1
435
436class RecoverCorruptEntireHeaderGZAESMultiTest (RecoverCorruptEntireHeaderGZAESTestBase):
437 VOLUMES = 3
517d35b7 438
e25f31ac
PG
439
440class RecoverCorruptTrailingDataTestBase (RecoverTest):
517d35b7
PG
441 # plain Tar is indifferent against traling data and the results
442 # are consistent
443 COMPRESSION = None
444 PASSWORD = None
445 FAILURES = 0
446 CORRUPT = corrupt_trailing_data
447 MISMATCHES = 0
448
e25f31ac
PG
449class RecoverCorruptTrailingDataSingleTest (RecoverCorruptTrailingDataTestBase):
450 VOLUMES = 1
451
452class RecoverCorruptTrailingDataMultiTest (RecoverCorruptTrailingDataTestBase):
14895f4b
PG
453 # the last object in first archive has extra bytes somewhere in the
454 # middle because tar itself performs no data checksumming.
455 MISMATCHES = 1
e25f31ac
PG
456 VOLUMES = 3
457
517d35b7 458
e25f31ac 459class RecoverCorruptTrailingDataGZTestBase (RecoverTest):
517d35b7
PG
460 # reading past the final object will cause decompression failure;
461 # all objects except for the last survive unharmed though
462 COMPRESSION = "#gz"
463 PASSWORD = None
464 FAILURES = 1
465 CORRUPT = corrupt_trailing_data
466 MISMATCHES = 0
467
e25f31ac
PG
468class RecoverCorruptTrailingDataGZSingleTest (RecoverCorruptTrailingDataGZTestBase):
469 VOLUMES = 1
517d35b7 470
e25f31ac
PG
471class RecoverCorruptTrailingDataGZMultiTest (RecoverCorruptTrailingDataGZTestBase):
472 VOLUMES = 3
14895f4b
PG
473 # the last file of the first volume will only contain the data of the
474 # second part which is contained in the second volume. this happens
475 # because the CRC32 is wrong for the first part so it gets discarded, then
476 # the object is recreated from the first header of the second volume,
477 # containing only the remainder of the data.
478 MISMATCHES = 1
4d4925de 479 MISSING = 0
e25f31ac
PG
480
481
482class RecoverCorruptTrailingDataGZAESTestBase (RecoverTest):
517d35b7
PG
483 COMPRESSION = "#gz"
484 PASSWORD = TEST_PASSWORD
485 FAILURES = 0
486 CORRUPT = corrupt_trailing_data
487 MISMATCHES = 0
488
e25f31ac
PG
489class RecoverCorruptTrailingDataGZAESSingleTest (RecoverCorruptTrailingDataGZAESTestBase):
490 VOLUMES = 1
491
492class RecoverCorruptTrailingDataGZAESMultiTest (RecoverCorruptTrailingDataGZAESTestBase):
493 VOLUMES = 3
517d35b7 494
20e1d773
PG
495
496class RecoverCorruptVolumeBaseTest (RecoverTest):
497 COMPRESSION = None
498 PASSWORD = None
499 FAILURES = 8
500 CORRUPT = corrupt_volume
501 VOLUMES = 3
502
503class RecoverCorruptVolumeTest (RecoverCorruptVolumeBaseTest):
504 pass
505
506class RecoverCorruptVolumeGZTest (RecoverTest):
507 COMPRESSION = "#gz"
508 PASSWORD = None
509 FAILURES = 8
510 CORRUPT = corrupt_volume
511 VOLUMES = 3
512
513class RecoverCorruptVolumeGZAESTest (RecoverTest):
514 COMPRESSION = "#gz"
515 PASSWORD = TEST_PASSWORD
516 FAILURES = 8
517 CORRUPT = corrupt_volume
518 VOLUMES = 3
519