From 704ceaa5b8fed2807c47b9ef4a0b997ba6e222cc Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Mon, 15 May 2017 17:44:48 +0200 Subject: [PATCH] extend crypto.py documentation --- deltatar/crypto.py | 284 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 files changed, 271 insertions(+), 13 deletions(-) diff --git a/deltatar/crypto.py b/deltatar/crypto.py index 782d4af..df353db 100755 --- a/deltatar/crypto.py +++ b/deltatar/crypto.py @@ -4,7 +4,7 @@ Intra2net 2017 =============================================================================== - crypto -- Encryption Layer for the Intra2net Backup + crypto -- Encryption Layer for the Deltatar Backup =============================================================================== Crypto stack: @@ -32,13 +32,14 @@ Errors Errors fall into roughly three categories: - - Cryptographical errors or data validity. + - Cryptographical errors or invalid data. - ``InvalidGCMTag`` (decryption failed on account of an invalid GCM tag), - ``InvalidIVFixedPart`` (IV fixed part of object not found in list), - ``DuplicateIV`` (the IV of an encrypted object already occurred), - - ``DecryptionError`` (used in CLI decryption). + - ``DecryptionError`` (used in CLI decryption for presenting error + conditions to the user). - Incorrect usage of the library. @@ -60,6 +61,68 @@ Initialization Vectors ------------------------------------------------------------------------------- Initialization vectors are checked reuse during the lifetime of a decryptor. +The fixed counters for metadata files cannot be reused and attempts to do so +will cause a DuplicateIV error. This means the length of objects encrypted with +a metadata counter is capped at 63 GB. + +For ordinary, non-metadata payload, there is an optional mode with strict IV +checking that causes a crypto context to fail if an IV encountered or created +was already used for decrypting or encrypting, respectively, an earlier object. +Note that this mode can trigger false positives when decrypting non-linearly, +e. g. when traversing the same object multiple times. Since the crypto context +has no notion of a position in a PDT encrypted archive, this condition must be +sorted out downstream. + +Command Line Utility +------------------------------------------------------------------------------- + +``crypto.py`` may be invoked as a script for decrypting, validating, and +splitting PDT encrypted files. Consult the usage message for details. + +Usage examples: + +Decrypt from stdin using the password ‘foo’: :: + + $ crypto.py process foo -i - -o - some-file.tar.gz + +Output verbose information about the encrypted objects in the archive: :: + + $ crypto.py process foo -v -i some-file.tar.gz.pdtcrypt -o /dev/null + PDT: decrypt from some-file.tar.gz.pdtcrypt + PDT: decrypt to /dev/null + PDT: source: file some-file.tar.gz.pdtcrypt + PDT: sink: file /dev/null + PDT: 0 hdr + PDT: · version = 1 : 0100 + PDT: · paramversion = 1 : 0100 + PDT: · nacl : d270 b031 00d1 87e2 c946 610d 7b7f 7e5f + PDT: · iv : 02ee 3dd7 a963 1eb1 0100 0000 + PDT: · ctsize = 591 : 4f02 0000 0000 0000 + PDT: · tag : 5b2d 6d8b 8f82 4842 12fd 0b10 b6e3 369b + PDT: 64 decrypt obj no. 1, 591 B + PDT: · [64] 0% done, read block (591 B of 591 B remaining) + PDT: · decrypt ciphertext 591 B + PDT: · decrypt plaintext 591 B + PDT: 655 finalize + … + +Also, the mode *scrypt* allows deriving encryption keys. To calculate the +encryption key from the password ‘foo’ and the salt of the first object in a +PDT encrypted file: :: + + $ crypto.py scrypt foo -i some-file.pdtcrypt + {"scrypt_params": {"r": 8, "dkLen": 16, "p": 1, "N": 65536}, "hash": "b'c2941dfc6e3e65e8e887f1702b1091a3'", "salt": "b'd270b03100d187e2c946610d7b7f7e5f'"} + +The computed 16 byte key is given in hexadecimal notation in the value to +``hash`` and can be fed into Python’s ``binascii.unhexlify()`` to obtain the +corresponding binary representation. + +Note that in Scrypt hashing mode, no data integrity checks are being performed. +If the wrong password is given, a wrong key will be derived. Whether the password +was indeed correct can only be determined by decrypting. Note that since PDT +archives essentially consist of a stream of independent objects, the salt and +other parameters may change. Thus a key derived using above method from the +first object doesn’t necessarily apply to any of the subsequent objects. """ @@ -249,9 +312,8 @@ AES_GCM_IV_CNT_MAX = AES_GCM_IV_CNT_MAX_DEFAULT # , paramversion : u16 # , nacl : [u8; 16] # , iv : [u8; 12] -# , ctsize : usize } -# -# tag : [u8; 16] +# , ctsize : usize +# , tag : [u8; 16] } # # fn hdr_read (f : handle) -> hdrinfo; # fn hdr_make (f : handle, h : hdrinfo) -> IOResult; @@ -259,6 +321,12 @@ AES_GCM_IV_CNT_MAX = AES_GCM_IV_CNT_MAX_DEFAULT # def hdr_read (data): + """ + Read bytes as header structure. + + If the input could not be interpreted as a header, fail with + ``InvalidHeader``. + """ try: mag, version, paramversion, nacl, iv, ctsize, tag = \ @@ -282,6 +350,12 @@ def hdr_read (data): def hdr_read_stream (instr): + """ + Read header from stream at the current position. + + Fail with ``InvalidHeader`` if insufficient bytes were read from the + stream, or if the content could not be interpreted as a header. + """ data = instr.read(PDTCRYPT_HDR_SIZE) ldata = len (data) if ldata == 0: @@ -293,6 +367,16 @@ def hdr_read_stream (instr): def hdr_from_params (version, paramversion, nacl, iv, ctsize, tag): + """ + Assemble the necessary values into a PDTCRYPT header. + + :type version: int to fit uint16_t + :type paramversion: int to fit uint16_t + :type nacl: bytes to fit uint8_t[16] + :type iv: bytes to fit uint8_t[12] + :type size: int to fit uint64_t + :type tag: bytes to fit uint8_t[16] + """ buf = bytearray (PDTCRYPT_HDR_SIZE) bufv = memoryview (buf) @@ -317,6 +401,9 @@ def hdr_make_dummy (s): def hdr_make (hdr): + """ + Assemble a header from the given header structure. + """ return hdr_from_params (version=hdr.get("version"), paramversion=hdr.get("paramversion"), nacl=hdr.get("nacl"), iv=hdr.get("iv"), @@ -327,6 +414,7 @@ HDR_FMT = "I2n_header { version: %d, paramversion: %d, nacl: %s[%d]," \ " iv: %s[%d], ctsize: %d, tag: %s[%d] }" def hdr_fmt (h): + """Format a header structure into readable output.""" return HDR_FMT % (h["version"], h["paramversion"], binascii.hexlify (h["nacl"]), len(h["nacl"]), binascii.hexlify (h["iv"]), len(h["iv"]), @@ -335,6 +423,7 @@ def hdr_fmt (h): def hex_spaced_of_bytes (b): + """Format bytes object, hexdump style.""" return " ".join ([ "%.2x%.2x" % (c1, c2) for c1, c2 in zip (b[0::2], b[1::2]) ]) \ + (len (b) | 1 == len (b) and " %.2x" % b[-1] or "") # odd lengths @@ -365,6 +454,11 @@ tag : %s """ def hdr_fmt_pretty (h): + """ + Format header structure into multi-line representation of its contents and + their raw representation. (Omit the implicit “PDTCRYPT” magic bytes that + precede every header.) + """ return HDR_FMT_PRETTY \ % (h["version"], hex_spaced_of_bytes (struct.pack (FMT_UINT16_LE, h["version"])), @@ -379,6 +473,7 @@ def hdr_fmt_pretty (h): IV_FMT = "((f %s) (c %d))" def iv_fmt (iv): + """Format the two components of an IV in a readable fashion.""" fixed, cnt = struct.unpack (FMT_I2N_IV, iv) return IV_FMT % (binascii.hexlify (fixed), cnt) @@ -405,6 +500,10 @@ class PassthroughCipher (object): def kdf_dummy (klen, password, _nacl): + """ + Fake KDF for testing purposes that is called when parameter version zero is + encountered. + """ q, r = divmod (klen, len (password)) if isinstance (password, bytes) is False: password = password.encode () @@ -415,6 +514,11 @@ SCRYPT_KEY_MEMO = { } # static because needed for both the info file and the arc def kdf_scrypt (params, password, nacl): + """ + Wrapper for the Scrypt KDF, corresponds to parameter version one. The + computation result is memoized based on the inputs to facilitate spawning + multiple encryption contexts. + """ N = params["N"] r = params["r"] p = params["p"] @@ -432,6 +536,12 @@ def kdf_scrypt (params, password, nacl): def kdf_by_version (paramversion=None, defs=None): + """ + Pick the KDF handler corresponding to the parameter version or the + definition set. + + :rtype: function (password : str, nacl : str) -> str + """ if paramversion is not None: defs = ENCRYPTION_PARAMETERS.get(paramversion, None) if defs is None: @@ -488,6 +598,11 @@ def scrypt_hashsource (pw, ins): def scrypt_hashfile (pw, fname): + """ + Calculate the SCRYPT hash from the password and the information contained + in the first header found in the given file. The header is read only at + offset zero. + """ with deptdcrypt_mk_stream (PDTCRYPT_SOURCE, fname or "-") as ins: hsh, _void, _void = scrypt_hashsource (pw, ins) return hsh @@ -525,11 +640,32 @@ class Crypto (object): self.set_parameters (*al, **akv) - def next_pfx (self): + def next_fixed (self): pass def set_object_counter (self, cnt=None): + """ + Safely set the internal counter of encrypted objects. Numerous + constraints apply: + + The same counter may not be reused in combination with one IV fixed + part. This is validated elsewhere in the IV handling. + + Counter zero is invalid. The first two counters are reserved for + metadata. The implementation does not allow for splitting metadata + files over multiple encrypted objects. (This would be possible by + assigning new fixed parts.) Thus in a Deltatar backup there is at most + one object with a counter value of one and two. On creation of a + context, the initial counter may be chosen. The globals + ``AES_GCM_IV_CNT_INFOFILE`` and ``AES_GCM_IV_CNT_INDEX`` can be used to + request one of the reserved values. If one of these values has been + used, any further attempt of setting the counter to that value will + be rejected with an ``InvalidFileCounter`` exception. + + Out of bounds values (i. e. below one and more than the maximum of 2³²) + cause an ``InvalidParameter`` exception to be thrown. + """ if cnt is None: self.cnt = AES_GCM_IV_CNT_DATA return @@ -552,15 +688,19 @@ class Crypto (object): return # cnt == AES_GCM_IV_CNT_MAX + 1 → wrap self.cnt = AES_GCM_IV_CNT_DATA - self.next_pfx () + self.next_fixed () def set_parameters (self, password=None, key=None, paramversion=None, - nacl=None, counter=None, nextpfx=None, + nacl=None, counter=None, next_fixed=None, strict_ivs=False): - if nextpfx is not None: - self.next_pfx = nextpfx - self.next_pfx () + """ + Configure the internal state of a crypto context. Not intended for + external use. + """ + if next_fixed is not None: + self.next_fixed = next_fixed + self.next_fixed () self.set_object_counter (counter) self.strict_ivs = strict_ivs @@ -581,6 +721,15 @@ class Crypto (object): def process (self, buf): + """ + Encrypt / decrypt a buffer. Invokes the ``.update()`` method on the + wrapped encryptor or decryptor, respectively. + + The Cryptography exception ``AlreadyFinalized`` is translated to an + ``InternalError`` at this point. It may occur in sound code when the GC + closes an encrypting stream after an error. Everywhere else it must be + treated as a bug. + """ if self.enc is None: raise RuntimeError ("process: context not initialized") self.stats ["in"] += len (buf) @@ -593,6 +742,12 @@ class Crypto (object): def next (self, password, paramversion, nacl, iv): + """ + Prepare for encrypting another object: Reset the data counters and + change the configuration in case one of the variable parameters differs + from the last object. Also check the IV for duplicates and error out + if strict checking was requested. + """ self.ctsize = 0 self.ptsize = 0 self.stats ["obj"] += 1 @@ -607,6 +762,11 @@ class Crypto (object): def check_duplicate_iv (self, iv): + """ + Add an IV (the 12 byte representation as in the header) to the list. With + strict checking enabled, this will throw a ``DuplicateIV``. Depending on + the context, this may indicate a serious error (IV reuse). + """ if self.strict_ivs is True and iv in self.used_ivs: raise DuplicateIV ("iv %s was reused" % iv_fmt (iv)) # vi has not been used before; add to collection @@ -614,6 +774,9 @@ class Crypto (object): def counters (self): + """ + Access the data counters. + """ return self.stats ["obj"], self.stats ["in"], self.stats ["out"] @@ -625,6 +788,24 @@ class Encrypt (Crypto): def __init__ (self, version, paramversion, password=None, key=None, nacl=None, counter=AES_GCM_IV_CNT_DATA, strict_ivs=True): + """ + The ctor will throw immediately if one of the parameters does not conform + to our expectations. + + counter=AES_GCM_IV_CNT_DATA, strict_ivs=True): + :type version: int to fit uint16_t + :type paramversion: int to fit uint16_t + :param password: mutually exclusive with ``key`` + :type password: bytes + :param key: mutually exclusive with ``password`` + :type key: bytes + :type nacl: bytes + :type counter: initial object counter the values + ``AES_GCM_IV_CNT_INFOFILE`` and + ``AES_GCM_IV_CNT_INDEX`` are unique in each backup set + and cannot be reused even with different fixed parts. + :type strict_ivs: bool + """ if password is None and key is None \ or password is not None and key is not None : raise InvalidParameter ("__init__: need either key or password") @@ -671,15 +852,37 @@ class Encrypt (Crypto): self.paramenc = ENCRYPTION_PARAMETERS.get (paramversion) ["enc"] super().__init__ (password, key, paramversion, nacl, counter=counter, - nextpfx=lambda: self.fixed.append (os.urandom(8)), + next_fixed=lambda: self.fixed.append (os.urandom(8)), strict_ivs=strict_ivs) def iv_make (self): + """ + Construct a 12-bytes IV from the current fixed part and the object + counter. + """ return struct.pack(FMT_I2N_IV, self.fixed [-1], self.cnt) def next (self, filename=None, counter=None): + """ + Prepare for encrypting the next incoming object. Update the counter + and put together the IV, possibly changing prefixes. Then create the + new encryptor. + + The argument ``counter`` can be used to specify a file counter for this + object. Unless it is one of the reserved values, the counter of + subsequent objects will be computed from this one. + + If this is the first object in a series, ``filename`` is required, + otherwise it is reused if not present. The value is used to derive a + header sized placeholder to use until after encryption when all the + inputs to construct the final header are available. This is then + matched in ``.done()`` against the value found at the position of the + header. The motivation for this extra check is primarily to assist + format debugging: It makes stray headers easy to spot in malformed + PDTCRYPT files. + """ if filename is None: if self.lastinfo is None: raise InvalidParameter ("next: filename is mandatory for " @@ -717,6 +920,15 @@ class Encrypt (Crypto): def done (self, cmpdata): + """ + Complete encryption of an object. After this has been called, attempts + of encrypting further data will cause an error until ``.next()`` is + invoked properly. + + Returns a 64 bytes buffer containing the object header including all + values including the “late” ones e. g. the ciphertext size and the + GCM tag. + """ if isinstance (cmpdata, bytes) is False: raise InvalidParameter ("done: comparison input expected as bytes, " "not %s" % type (cmpdata)) @@ -739,6 +951,15 @@ class Encrypt (Crypto): def process (self, buf): + """ + Encrypt a chunk of plaintext with the active encryptor. Returns the + size of the input consumed. This **must** be checked downstream. If the + maximum possible object size has been reached, the current context must + be finalized and a new one established before any further data can be + encrypted. The second argument is the remainder of the plaintext that + was not encrypted for the caller to use immediately after the new + context is ready. + """ if isinstance (buf, bytes) is False: raise InvalidParameter ("process: expected byte buffer, not %s" % type (buf)) @@ -761,6 +982,21 @@ class Decrypt (Crypto): def __init__ (self, password=None, key=None, counter=None, fixedparts=None, strict_ivs=False): + """ + Sanitizing ctor for the decryption context. ``fixedparts`` specifies a + list of IV fixed parts accepted during decryption. If a fixed part is + encountered that is not in the list, decryption will fail. + + :param password: mutually exclusive with ``key`` + :type password: bytes + :param key: mutually exclusive with ``password`` + :type key: bytes + :type counter: initial object counter the values + ``AES_GCM_IV_CNT_INFOFILE`` and + ``AES_GCM_IV_CNT_INDEX`` are unique in each backup set + and cannot be reused even with different fixed parts. + :type fixedparts: bytes list + """ if password is None and key is None \ or password is not None and key is not None : raise InvalidParameter ("__init__: need either key or password") @@ -789,6 +1025,9 @@ class Decrypt (Crypto): def valid_fixed_part (self, iv): + """ + Check if a fixed part was already seen. + """ # check if fixed part is known fixed, _cnt = struct.unpack (FMT_I2N_IV, iv) i = bisect.bisect_left (self.fixed, fixed) @@ -796,6 +1035,12 @@ class Decrypt (Crypto): def check_consecutive_iv (self, iv): + """ + Check whether the counter part of the given IV is indeed the successor + of the currently present counter. This should always be the case for + the objects in a well formed PDT archive but should not be enforced + when decrypting out-of-order. + """ fixed, cnt = struct.unpack (FMT_I2N_IV, iv) if self.strict_ivs is True \ and self.last_iv is not None \ @@ -808,6 +1053,10 @@ class Decrypt (Crypto): def next (self, hdr): + """ + Start decrypting the next object. The PDTCRYPT header for the object + can be given either as already parsed object or as bytes. + """ if isinstance (hdr, bytes) is True: hdr = hdr_read (hdr) elif isinstance (hdr, dict) is False: @@ -851,6 +1100,12 @@ class Decrypt (Crypto): def done (self, tag=None): + """ + Stop decryption of the current object and finalize it with the active + context. This will throw an *InvalidGCMTag* exception to indicate that + the authentication tag does not match the data. If the tag is correct, + the rest of the plaintext is returned. + """ data = b"" try: if tag is None: @@ -871,6 +1126,9 @@ class Decrypt (Crypto): def process (self, buf): + """ + Decrypt the bytes object *buf* with the active decryptor. + """ if isinstance (buf, bytes) is False: raise InvalidParameter ("process: expected byte buffer, not %s" % type (buf)) -- 1.7.1