From dbd6ff685c7503076ff76a3b6c83ed44cef66dac Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Thu, 31 Aug 2017 10:57:17 +0200 Subject: [PATCH] describe corruption mechanisms and their function in testing --- testing/test_recover.py | 66 +++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 64 insertions(+), 2 deletions(-) diff --git a/testing/test_recover.py b/testing/test_recover.py index a5016c0..a8ebc55 100644 --- a/testing/test_recover.py +++ b/testing/test_recover.py @@ -1,3 +1,65 @@ +""" +Intra2net 2017 + +=============================================================================== + test_recover.py – behavior facing file corruption +=============================================================================== + +Corruptors have the signature ``(unittest × string × bool × bool) → void``, +where the *string* argument is the name of the file to modify, the *booleans* +specialize the operation for compressed and encrypted data. Issues are +communicated upward by throwing. + + - corrupt_header (): + Modify the first object header where it hurts. With encryption, the tag + is corrupted to cause authentication of the decrypted data to fail. For + compressed data, the two byte magic is altered, for uncompressed + archives, the tar header checksum field. + + - corrupt_ctsize (): + Modify the *ctsize* field of a PDTCRYPT header. The goal is to have + decryption continue past the end of the object, causing data + authentication to fail and file reads to be at odds with the offsets in + the index. Only applicable to encrypted archives; will raise + *UndefinedTest* otherwise. + + - corrupt_entire_header (): + Invert all bits of the first object header (PDTCRYPT, gzip, tar) without + affecting the payload. This renders the object unreadable; the file will + be resemble one with arbitrary leading data but all the remaining object + offsets intact, so the contents can still be extracted with index based + recovery. + + - corrupt_payload_start (): + For all header variants, skip to the first byte past the header and + corrupt it. Encrypted objects will fail to authenticate. Compressed + objects will yield a bad CRC32. The Tar layer will take no notice but + the extracted object will fail an independent checksum comparison with + that of the original file. + + - corrupt_leading_garbage (): + Prepend random data to an otherwise valid file. Creates a situation that + index based recovery cannot handle by shifting the offsets of all objects + in the file. In rescue mode, these objects must be located and extracted + regardless. + + - corrupt_trailing_data (): + Append data to an otherwise valid file. Both the recovery and rescue + modes must be able to retrieve all objects from that file. + + - corrupt_volume (): + Zero out an entire backup file. This is interesting for multivolume + tests: all files from the affected volume must be missing but objects + that span volume bounds will still be partially recoverable. + + - corrupt_hole (): + Remove a region from a file. Following the damaged part, no object can be + recovered in index mode, but rescue mode will still find those. The + object containing the start of the hole will fail checksum tests because + of the missing part and the overlap with the subsequent object. + +""" + import logging import os import shutil @@ -130,7 +192,7 @@ def corrupt_entire_header (_, fname, compress, encrypt): """ if encrypt is True: flip_bits (fname, 0, 0xff, crypto.PDTCRYPT_HDR_SIZE) - elif compress is True: # invalidate magic + elif compress is True: flip_bits (fname, 0, 0xff, gz_header_size (fname)) else: flip_bits (fname, 0, 0xff, tarfile.BLOCKSIZE) @@ -183,7 +245,7 @@ def corrupt_leading_garbage (_, fname, compress, encrypt): def corrupt_trailing_data (_, fname, compress, encrypt): """ - Modify the byte following the object header structure of the format. + Append random data to file. """ junk = os.urandom (42) fd = os.open (fname, os.O_WRONLY | os.O_APPEND) -- 1.7.1