Intra2net 2017
===============================================================================
- crypto -- Encryption Layer for the Intra2net Backup
+ crypto -- Encryption Layer for the Deltatar Backup
===============================================================================
Crypto stack:
Errors fall into roughly three categories:
- - Cryptographical errors or data validity.
+ - Cryptographical errors or invalid data.
- ``InvalidGCMTag`` (decryption failed on account of an invalid GCM
tag),
- ``InvalidIVFixedPart`` (IV fixed part of object not found in list),
- ``DuplicateIV`` (the IV of an encrypted object already occurred),
- - ``DecryptionError`` (used in CLI decryption).
+ - ``DecryptionError`` (used in CLI decryption for presenting error
+ conditions to the user).
- Incorrect usage of the library.
-------------------------------------------------------------------------------
Initialization vectors are checked reuse during the lifetime of a decryptor.
+The fixed counters for metadata files cannot be reused and attempts to do so
+will cause a DuplicateIV error. This means the length of objects encrypted with
+a metadata counter is capped at 63 GB.
+
+For ordinary, non-metadata payload, there is an optional mode with strict IV
+checking that causes a crypto context to fail if an IV encountered or created
+was already used for decrypting or encrypting, respectively, an earlier object.
+Note that this mode can trigger false positives when decrypting non-linearly,
+e. g. when traversing the same object multiple times. Since the crypto context
+has no notion of a position in a PDT encrypted archive, this condition must be
+sorted out downstream.
+
+Command Line Utility
+-------------------------------------------------------------------------------
+
+``crypto.py`` may be invoked as a script for decrypting, validating, and
+splitting PDT encrypted files. Consult the usage message for details.
+
+Usage examples:
+
+Decrypt from stdin using the password ‘foo’: ::
+
+ $ crypto.py process foo -i - -o - <some-file.tar.gz.pdtcrypt >some-file.tar.gz
+
+Output verbose information about the encrypted objects in the archive: ::
+
+ $ crypto.py process foo -v -i some-file.tar.gz.pdtcrypt -o /dev/null
+ PDT: decrypt from some-file.tar.gz.pdtcrypt
+ PDT: decrypt to /dev/null
+ PDT: source: file some-file.tar.gz.pdtcrypt
+ PDT: sink: file /dev/null
+ PDT: 0 hdr
+ PDT: · version = 1 : 0100
+ PDT: · paramversion = 1 : 0100
+ PDT: · nacl : d270 b031 00d1 87e2 c946 610d 7b7f 7e5f
+ PDT: · iv : 02ee 3dd7 a963 1eb1 0100 0000
+ PDT: · ctsize = 591 : 4f02 0000 0000 0000
+ PDT: · tag : 5b2d 6d8b 8f82 4842 12fd 0b10 b6e3 369b
+ PDT: 64 decrypt obj no. 1, 591 B
+ PDT: · [64] 0% done, read block (591 B of 591 B remaining)
+ PDT: · decrypt ciphertext 591 B
+ PDT: · decrypt plaintext 591 B
+ PDT: 655 finalize
+ …
+
+Also, the mode *scrypt* allows deriving encryption keys. To calculate the
+encryption key from the password ‘foo’ and the salt of the first object in a
+PDT encrypted file: ::
+
+ $ crypto.py scrypt foo -i some-file.pdtcrypt
+ {"scrypt_params": {"r": 8, "dkLen": 16, "p": 1, "N": 65536}, "hash": "b'c2941dfc6e3e65e8e887f1702b1091a3'", "salt": "b'd270b03100d187e2c946610d7b7f7e5f'"}
+
+The computed 16 byte key is given in hexadecimal notation in the value to
+``hash`` and can be fed into Python’s ``binascii.unhexlify()`` to obtain the
+corresponding binary representation.
+
+Note that in Scrypt hashing mode, no data integrity checks are being performed.
+If the wrong password is given, a wrong key will be derived. Whether the password
+was indeed correct can only be determined by decrypting. Note that since PDT
+archives essentially consist of a stream of independent objects, the salt and
+other parameters may change. Thus a key derived using above method from the
+first object doesn’t necessarily apply to any of the subsequent objects.
"""
# , paramversion : u16
# , nacl : [u8; 16]
# , iv : [u8; 12]
-# , ctsize : usize }
-#
-# tag : [u8; 16]
+# , ctsize : usize
+# , tag : [u8; 16] }
#
# fn hdr_read (f : handle) -> hdrinfo;
# fn hdr_make (f : handle, h : hdrinfo) -> IOResult<usize>;
#
def hdr_read (data):
+ """
+ Read bytes as header structure.
+
+ If the input could not be interpreted as a header, fail with
+ ``InvalidHeader``.
+ """
try:
mag, version, paramversion, nacl, iv, ctsize, tag = \
def hdr_read_stream (instr):
+ """
+ Read header from stream at the current position.
+
+ Fail with ``InvalidHeader`` if insufficient bytes were read from the
+ stream, or if the content could not be interpreted as a header.
+ """
data = instr.read(PDTCRYPT_HDR_SIZE)
ldata = len (data)
if ldata == 0:
def hdr_from_params (version, paramversion, nacl, iv, ctsize, tag):
+ """
+ Assemble the necessary values into a PDTCRYPT header.
+
+ :type version: int to fit uint16_t
+ :type paramversion: int to fit uint16_t
+ :type nacl: bytes to fit uint8_t[16]
+ :type iv: bytes to fit uint8_t[12]
+ :type size: int to fit uint64_t
+ :type tag: bytes to fit uint8_t[16]
+ """
buf = bytearray (PDTCRYPT_HDR_SIZE)
bufv = memoryview (buf)
def hdr_make (hdr):
+ """
+ Assemble a header from the given header structure.
+ """
return hdr_from_params (version=hdr.get("version"),
paramversion=hdr.get("paramversion"),
nacl=hdr.get("nacl"), iv=hdr.get("iv"),
" iv: %s[%d], ctsize: %d, tag: %s[%d] }"
def hdr_fmt (h):
+ """Format a header structure into readable output."""
return HDR_FMT % (h["version"], h["paramversion"],
binascii.hexlify (h["nacl"]), len(h["nacl"]),
binascii.hexlify (h["iv"]), len(h["iv"]),
def hex_spaced_of_bytes (b):
+ """Format bytes object, hexdump style."""
return " ".join ([ "%.2x%.2x" % (c1, c2)
for c1, c2 in zip (b[0::2], b[1::2]) ]) \
+ (len (b) | 1 == len (b) and " %.2x" % b[-1] or "") # odd lengths
"""
def hdr_fmt_pretty (h):
+ """
+ Format header structure into multi-line representation of its contents and
+ their raw representation. (Omit the implicit “PDTCRYPT” magic bytes that
+ precede every header.)
+ """
return HDR_FMT_PRETTY \
% (h["version"],
hex_spaced_of_bytes (struct.pack (FMT_UINT16_LE, h["version"])),
IV_FMT = "((f %s) (c %d))"
def iv_fmt (iv):
+ """Format the two components of an IV in a readable fashion."""
fixed, cnt = struct.unpack (FMT_I2N_IV, iv)
return IV_FMT % (binascii.hexlify (fixed), cnt)
def kdf_dummy (klen, password, _nacl):
+ """
+ Fake KDF for testing purposes that is called when parameter version zero is
+ encountered.
+ """
q, r = divmod (klen, len (password))
if isinstance (password, bytes) is False:
password = password.encode ()
def kdf_scrypt (params, password, nacl):
+ """
+ Wrapper for the Scrypt KDF, corresponds to parameter version one. The
+ computation result is memoized based on the inputs to facilitate spawning
+ multiple encryption contexts.
+ """
N = params["N"]
r = params["r"]
p = params["p"]
def kdf_by_version (paramversion=None, defs=None):
+ """
+ Pick the KDF handler corresponding to the parameter version or the
+ definition set.
+
+ :rtype: function (password : str, nacl : str) -> str
+ """
if paramversion is not None:
defs = ENCRYPTION_PARAMETERS.get(paramversion, None)
if defs is None:
def scrypt_hashfile (pw, fname):
+ """
+ Calculate the SCRYPT hash from the password and the information contained
+ in the first header found in the given file. The header is read only at
+ offset zero.
+ """
with deptdcrypt_mk_stream (PDTCRYPT_SOURCE, fname or "-") as ins:
hsh, _void, _void = scrypt_hashsource (pw, ins)
return hsh
self.set_parameters (*al, **akv)
- def next_pfx (self):
+ def next_fixed (self):
pass
def set_object_counter (self, cnt=None):
+ """
+ Safely set the internal counter of encrypted objects. Numerous
+ constraints apply:
+
+ The same counter may not be reused in combination with one IV fixed
+ part. This is validated elsewhere in the IV handling.
+
+ Counter zero is invalid. The first two counters are reserved for
+ metadata. The implementation does not allow for splitting metadata
+ files over multiple encrypted objects. (This would be possible by
+ assigning new fixed parts.) Thus in a Deltatar backup there is at most
+ one object with a counter value of one and two. On creation of a
+ context, the initial counter may be chosen. The globals
+ ``AES_GCM_IV_CNT_INFOFILE`` and ``AES_GCM_IV_CNT_INDEX`` can be used to
+ request one of the reserved values. If one of these values has been
+ used, any further attempt of setting the counter to that value will
+ be rejected with an ``InvalidFileCounter`` exception.
+
+ Out of bounds values (i. e. below one and more than the maximum of 2³²)
+ cause an ``InvalidParameter`` exception to be thrown.
+ """
if cnt is None:
self.cnt = AES_GCM_IV_CNT_DATA
return
return
# cnt == AES_GCM_IV_CNT_MAX + 1 → wrap
self.cnt = AES_GCM_IV_CNT_DATA
- self.next_pfx ()
+ self.next_fixed ()
def set_parameters (self, password=None, key=None, paramversion=None,
- nacl=None, counter=None, nextpfx=None,
+ nacl=None, counter=None, next_fixed=None,
strict_ivs=False):
- if nextpfx is not None:
- self.next_pfx = nextpfx
- self.next_pfx ()
+ """
+ Configure the internal state of a crypto context. Not intended for
+ external use.
+ """
+ if next_fixed is not None:
+ self.next_fixed = next_fixed
+ self.next_fixed ()
self.set_object_counter (counter)
self.strict_ivs = strict_ivs
def process (self, buf):
+ """
+ Encrypt / decrypt a buffer. Invokes the ``.update()`` method on the
+ wrapped encryptor or decryptor, respectively.
+
+ The Cryptography exception ``AlreadyFinalized`` is translated to an
+ ``InternalError`` at this point. It may occur in sound code when the GC
+ closes an encrypting stream after an error. Everywhere else it must be
+ treated as a bug.
+ """
if self.enc is None:
raise RuntimeError ("process: context not initialized")
self.stats ["in"] += len (buf)
def next (self, password, paramversion, nacl, iv):
+ """
+ Prepare for encrypting another object: Reset the data counters and
+ change the configuration in case one of the variable parameters differs
+ from the last object. Also check the IV for duplicates and error out
+ if strict checking was requested.
+ """
self.ctsize = 0
self.ptsize = 0
self.stats ["obj"] += 1
def check_duplicate_iv (self, iv):
+ """
+ Add an IV (the 12 byte representation as in the header) to the list. With
+ strict checking enabled, this will throw a ``DuplicateIV``. Depending on
+ the context, this may indicate a serious error (IV reuse).
+ """
if self.strict_ivs is True and iv in self.used_ivs:
raise DuplicateIV ("iv %s was reused" % iv_fmt (iv))
# vi has not been used before; add to collection
def counters (self):
+ """
+ Access the data counters.
+ """
return self.stats ["obj"], self.stats ["in"], self.stats ["out"]
def __init__ (self, version, paramversion, password=None, key=None, nacl=None,
counter=AES_GCM_IV_CNT_DATA, strict_ivs=True):
+ """
+ The ctor will throw immediately if one of the parameters does not conform
+ to our expectations.
+
+ counter=AES_GCM_IV_CNT_DATA, strict_ivs=True):
+ :type version: int to fit uint16_t
+ :type paramversion: int to fit uint16_t
+ :param password: mutually exclusive with ``key``
+ :type password: bytes
+ :param key: mutually exclusive with ``password``
+ :type key: bytes
+ :type nacl: bytes
+ :type counter: initial object counter the values
+ ``AES_GCM_IV_CNT_INFOFILE`` and
+ ``AES_GCM_IV_CNT_INDEX`` are unique in each backup set
+ and cannot be reused even with different fixed parts.
+ :type strict_ivs: bool
+ """
if password is None and key is None \
or password is not None and key is not None :
raise InvalidParameter ("__init__: need either key or password")
self.paramenc = ENCRYPTION_PARAMETERS.get (paramversion) ["enc"]
super().__init__ (password, key, paramversion, nacl, counter=counter,
- nextpfx=lambda: self.fixed.append (os.urandom(8)),
+ next_fixed=lambda: self.fixed.append (os.urandom(8)),
strict_ivs=strict_ivs)
def iv_make (self):
+ """
+ Construct a 12-bytes IV from the current fixed part and the object
+ counter.
+ """
return struct.pack(FMT_I2N_IV, self.fixed [-1], self.cnt)
def next (self, filename=None, counter=None):
+ """
+ Prepare for encrypting the next incoming object. Update the counter
+ and put together the IV, possibly changing prefixes. Then create the
+ new encryptor.
+
+ The argument ``counter`` can be used to specify a file counter for this
+ object. Unless it is one of the reserved values, the counter of
+ subsequent objects will be computed from this one.
+
+ If this is the first object in a series, ``filename`` is required,
+ otherwise it is reused if not present. The value is used to derive a
+ header sized placeholder to use until after encryption when all the
+ inputs to construct the final header are available. This is then
+ matched in ``.done()`` against the value found at the position of the
+ header. The motivation for this extra check is primarily to assist
+ format debugging: It makes stray headers easy to spot in malformed
+ PDTCRYPT files.
+ """
if filename is None:
if self.lastinfo is None:
raise InvalidParameter ("next: filename is mandatory for "
def done (self, cmpdata):
+ """
+ Complete encryption of an object. After this has been called, attempts
+ of encrypting further data will cause an error until ``.next()`` is
+ invoked properly.
+
+ Returns a 64 bytes buffer containing the object header including all
+ values including the “late” ones e. g. the ciphertext size and the
+ GCM tag.
+ """
if isinstance (cmpdata, bytes) is False:
raise InvalidParameter ("done: comparison input expected as bytes, "
"not %s" % type (cmpdata))
def process (self, buf):
+ """
+ Encrypt a chunk of plaintext with the active encryptor. Returns the
+ size of the input consumed. This **must** be checked downstream. If the
+ maximum possible object size has been reached, the current context must
+ be finalized and a new one established before any further data can be
+ encrypted. The second argument is the remainder of the plaintext that
+ was not encrypted for the caller to use immediately after the new
+ context is ready.
+ """
if isinstance (buf, bytes) is False:
raise InvalidParameter ("process: expected byte buffer, not %s"
% type (buf))
def __init__ (self, password=None, key=None, counter=None, fixedparts=None,
strict_ivs=False):
+ """
+ Sanitizing ctor for the decryption context. ``fixedparts`` specifies a
+ list of IV fixed parts accepted during decryption. If a fixed part is
+ encountered that is not in the list, decryption will fail.
+
+ :param password: mutually exclusive with ``key``
+ :type password: bytes
+ :param key: mutually exclusive with ``password``
+ :type key: bytes
+ :type counter: initial object counter the values
+ ``AES_GCM_IV_CNT_INFOFILE`` and
+ ``AES_GCM_IV_CNT_INDEX`` are unique in each backup set
+ and cannot be reused even with different fixed parts.
+ :type fixedparts: bytes list
+ """
if password is None and key is None \
or password is not None and key is not None :
raise InvalidParameter ("__init__: need either key or password")
def valid_fixed_part (self, iv):
+ """
+ Check if a fixed part was already seen.
+ """
# check if fixed part is known
fixed, _cnt = struct.unpack (FMT_I2N_IV, iv)
i = bisect.bisect_left (self.fixed, fixed)
def check_consecutive_iv (self, iv):
+ """
+ Check whether the counter part of the given IV is indeed the successor
+ of the currently present counter. This should always be the case for
+ the objects in a well formed PDT archive but should not be enforced
+ when decrypting out-of-order.
+ """
fixed, cnt = struct.unpack (FMT_I2N_IV, iv)
if self.strict_ivs is True \
and self.last_iv is not None \
def next (self, hdr):
+ """
+ Start decrypting the next object. The PDTCRYPT header for the object
+ can be given either as already parsed object or as bytes.
+ """
if isinstance (hdr, bytes) is True:
hdr = hdr_read (hdr)
elif isinstance (hdr, dict) is False:
def done (self, tag=None):
+ """
+ Stop decryption of the current object and finalize it with the active
+ context. This will throw an *InvalidGCMTag* exception to indicate that
+ the authentication tag does not match the data. If the tag is correct,
+ the rest of the plaintext is returned.
+ """
data = b""
try:
if tag is None:
def process (self, buf):
+ """
+ Decrypt the bytes object *buf* with the active decryptor.
+ """
if isinstance (buf, bytes) is False:
raise InvalidParameter ("process: expected byte buffer, not %s"
% type (buf))