6 ===============================================================================
7 crypto -- Encryption Layer for the Deltatar Backup
8 ===============================================================================
12 - AES-GCM for the symmetric encryption;
17 - NIST Recommendation for Block Cipher Modes of Operation: Galois/Counter
19 http://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf
22 https://cryptome.org/2014/01/aes-gcm-v1.pdf
24 - Authentication weaknesses in GCM
25 http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/comments/CWC-GCM/Ferguson2.pdf
28 -------------------------------------------------------------------------------
30 Errors fall into roughly three categories:
32 - Cryptographical errors or invalid data.
34 - ``InvalidGCMTag`` (decryption failed on account of an invalid GCM
36 - ``InvalidIVFixedPart`` (IV fixed part of object not found in list),
37 - ``DuplicateIV`` (the IV of an encrypted object already occurred),
38 - ``DecryptionError`` (used in CLI decryption for presenting error
39 conditions to the user).
41 - Incorrect usage of the library.
43 - ``InvalidParameter`` (non-conforming user supplied parameter),
44 - ``InvalidHeader`` (data passed for reading not parsable into header),
45 - ``FormatError`` (cannot handle header or parameter version),
48 - Bad internal state. If one of these is encountered it means that a state
49 was reached that shouldn’t occur during normal processing.
54 Also, ``EndOfFile`` is used as a sentinel to communicate that a stream supplied
55 for reading is exhausted.
57 Initialization Vectors
58 -------------------------------------------------------------------------------
60 Initialization vectors are checked for reuse during the lifetime of a decryptor.
61 The fixed counters for metadata files cannot be reused and attempts to do so
62 will cause a DuplicateIV error. This means the length of objects encrypted with
63 a metadata counter is capped at 63 GB.
65 For ordinary, non-metadata payload, there is an optional mode with strict IV
66 checking that causes a crypto context to fail if an IV encountered or created
67 was already used for decrypting or encrypting, respectively, an earlier object.
68 Note that this mode can trigger false positives when decrypting non-linearly,
69 e. g. when traversing the same object multiple times. Since the crypto context
70 has no notion of a position in a PDT encrypted archive, this condition must be
71 sorted out downstream.
74 -------------------------------------------------------------------------------
76 ``crypto.py`` may be invoked as a script for decrypting, validating, and
77 splitting PDT encrypted files. Consult the usage message for details.
81 Decrypt from stdin using the password ‘foo’: ::
83 $ crypto.py process foo -i - -o - <some-file.tar.gz.pdtcrypt >some-file.tar.gz
85 Output verbose information about the encrypted objects in the archive: ::
87 $ crypto.py process foo -v -i some-file.tar.gz.pdtcrypt -o /dev/null
88 PDT: decrypt from some-file.tar.gz.pdtcrypt
89 PDT: decrypt to /dev/null
90 PDT: source: file some-file.tar.gz.pdtcrypt
91 PDT: sink: file /dev/null
93 PDT: · version = 1 : 0100
94 PDT: · paramversion = 1 : 0100
95 PDT: · nacl : d270 b031 00d1 87e2 c946 610d 7b7f 7e5f
96 PDT: · iv : 02ee 3dd7 a963 1eb1 0100 0000
97 PDT: · ctsize = 591 : 4f02 0000 0000 0000
98 PDT: · tag : 5b2d 6d8b 8f82 4842 12fd 0b10 b6e3 369b
99 PDT: 64 decrypt obj no. 1, 591 B
100 PDT: · [64] 0% done, read block (591 B of 591 B remaining)
101 PDT: · decrypt ciphertext 591 B
102 PDT: · decrypt plaintext 591 B
106 Also, the mode *scrypt* allows deriving encryption keys. To calculate the
107 encryption key from the password ‘foo’ and the salt of the first object in a
108 PDT encrypted file: ::
110 $ crypto.py scrypt foo -i some-file.pdtcrypt
111 {"paramversion": 1, "salt": "Cqzbk48e3peEjzWto8D0yA==", "key": "JH9EkMwaM4x9F5aim5gK/Q=="}
113 The computed 16 byte key is given in hexadecimal notation in the value to
114 ``hash`` and can be fed into Python’s ``binascii.unhexlify()`` to obtain the
115 corresponding binary representation.
117 Note that in Scrypt hashing mode, no data integrity checks are being performed.
118 If the wrong password is given, a wrong key will be derived. Whether the password
119 was indeed correct can only be determined by decrypting. Note that since PDT
120 archives essentially consist of a stream of independent objects, the salt and
121 other parameters may change. Thus a key derived using above method from the
122 first object doesn’t necessarily apply to any of the subsequent objects.
131 from functools import reduce, partial
142 except ImportError as exn:
145 if __name__ == "__main__": ## Work around the import mechanism lest Python’s
146 pwd = os.getcwd() ## preference for local imports causes a cyclical
147 ## import (crypto → pylibscrypt → […] → ./tarfile → crypto).
148 sys.path = [ p for p in sys.path if p.find ("deltatar") < 0 ]
151 from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
152 from cryptography.hazmat.backends import default_backend
156 __all__ = [ "hdr_make", "hdr_read", "hdr_fmt", "hdr_fmt_pretty"
158 , "PDTCRYPT_HDR_SIZE", "AES_GCM_IV_CNT_DATA"
159 , "AES_GCM_IV_CNT_INFOFILE", "AES_GCM_IV_CNT_INDEX"
163 ###############################################################################
165 ###############################################################################
167 class EndOfFile (Exception):
171 def __init__ (self, n=None, msg=None):
177 class InvalidParameter (Exception):
178 """Inputs not valid for PDT encryption."""
182 class InvalidHeader (Exception):
183 """Header not valid."""
187 class InvalidGCMTag (Exception):
189 The GCM tag calculated during decryption differs from that in the object
195 class InvalidIVFixedPart (Exception):
197 IV fixed part not in supplied list: either the backup is corrupt or the
198 current object does not belong to it.
203 class IVFixedPartError (Exception):
205 Error creating a unique IV fixed part: repeated calls to system RNG yielded
206 the same sequence of bytes as the last IV used.
211 class InvalidFileCounter (Exception):
213 When encrypting, an attempted reuse of a dedicated counter (info file,
214 index file) was caught.
219 class DuplicateIV (Exception):
221 During encryption, the current IV fixed part is identical to an already
222 existing IV (same prefix and file counter). This indicates tampering or
223 programmer error and cannot be recovered from.
228 class NonConsecutiveIV (Exception):
230 IVs not numbered consecutively. This is a hard error with strict IV
231 checking. Precludes random access to the encrypted objects.
236 class CiphertextTooLong (Exception):
238 An attempt was made to decrypt more data than the ciphertext size declared
239 in the object header.
244 class FormatError (Exception):
245 """Unusable parameters in header."""
249 class DecryptionError (Exception):
250 """Error during decryption with ``crypto.py`` on the command line."""
254 class Unreachable (Exception):
256 Makeshift __builtin_unreachable(); always a programmer error if
262 class InternalError (Exception):
263 """Errors not ascribable to bad user inputs or cryptography."""
267 ###############################################################################
268 ## crypto layer version
269 ###############################################################################
271 ENCRYPTION_PARAMETERS = \
273 { "kdf": ("dummy", 16)
274 , "enc": "passthrough" }
282 , "enc": "aes-gcm" } }
284 ###############################################################################
286 ###############################################################################
288 PDTCRYPT_HDR_MAGIC = b"PDTCRYPT"
290 PDTCRYPT_HDR_SIZE_MAGIC = 8 # 8
291 PDTCRYPT_HDR_SIZE_VERSION = 2 # 10
292 PDTCRYPT_HDR_SIZE_PARAMVERSION = 2 # 12
293 PDTCRYPT_HDR_SIZE_NACL = 16 # 28
294 PDTCRYPT_HDR_SIZE_IV = 12 # 40
295 PDTCRYPT_HDR_SIZE_CTSIZE = 8 # 48
296 PDTCRYPT_HDR_SIZE_TAG = 16 # 64 GCM auth tag
298 PDTCRYPT_HDR_SIZE = PDTCRYPT_HDR_SIZE_MAGIC + PDTCRYPT_HDR_SIZE_VERSION \
299 + PDTCRYPT_HDR_SIZE_PARAMVERSION + PDTCRYPT_HDR_SIZE_NACL \
300 + PDTCRYPT_HDR_SIZE_IV + PDTCRYPT_HDR_SIZE_CTSIZE \
301 + PDTCRYPT_HDR_SIZE_TAG # = 64
303 # precalculate offsets since Python can’t do constant folding over names
304 HDR_OFF_VERSION = PDTCRYPT_HDR_SIZE_MAGIC
305 HDR_OFF_PARAMVERSION = HDR_OFF_VERSION + PDTCRYPT_HDR_SIZE_VERSION
306 HDR_OFF_NACL = HDR_OFF_PARAMVERSION + PDTCRYPT_HDR_SIZE_PARAMVERSION
307 HDR_OFF_IV = HDR_OFF_NACL + PDTCRYPT_HDR_SIZE_NACL
308 HDR_OFF_CTSIZE = HDR_OFF_IV + PDTCRYPT_HDR_SIZE_IV
309 HDR_OFF_TAG = HDR_OFF_CTSIZE + PDTCRYPT_HDR_SIZE_CTSIZE
313 FMT_I2N_IV = "<8sL" # 8 random bytes ‖ 32 bit counter
314 FMT_I2N_HDR = ("<" # host byte order
318 "16s" # sodium chloride
324 AES_KEY_SIZE = 16 # b"0123456789abcdef"
325 AES_KEY_SIZE_B64 = 24 # b'MDEyMzQ1Njc4OWFiY2RlZg=='
327 AES_GCM_MAX_SIZE = (1 << 36) - (1 << 5) # 2^39 - 2^8 b ≅ 64 GB.
328 # Source: NIST SP 800-38D section 5.2.1.1
329 # https://crypto.stackexchange.com/questions/31793/plain-text-size-limits-for-aes-gcm-mode-just-64gb
331 PDTCRYPT_MAX_OBJ_SIZE_DEFAULT = 63 * (1 << 30) # 63 GB
332 PDTCRYPT_MAX_OBJ_SIZE = PDTCRYPT_MAX_OBJ_SIZE_DEFAULT
334 # index and info files are written on-the fly while encrypting so their
335 # counters must be available in advance
336 AES_GCM_IV_CNT_INFOFILE = 1 # constant
337 AES_GCM_IV_CNT_INDEX = AES_GCM_IV_CNT_INFOFILE + 1
338 AES_GCM_IV_CNT_DATA = AES_GCM_IV_CNT_INDEX + 1 # also for multivolume
339 AES_GCM_IV_CNT_MAX_DEFAULT = 0xffFFffFF
340 AES_GCM_IV_CNT_MAX = AES_GCM_IV_CNT_MAX_DEFAULT
342 # IV structure and generation
343 PDTCRYPT_IV_GEN_MAX_RETRIES = 10 # ×
344 PDTCRYPT_IV_FIXEDPART_SIZE = 8 # B
345 PDTCRYPT_IV_COUNTER_SIZE = 4 # B
347 # secret type: PW of string | KEY of char [16]
348 PDTCRYPT_SECRET_PW = 0
349 PDTCRYPT_SECRET_KEY = 1
351 ###############################################################################
353 ###############################################################################
359 # , paramversion : u16
365 # fn hdr_read (f : handle) -> hdrinfo;
366 # fn hdr_make (f : handle, h : hdrinfo) -> IOResult<usize>;
367 # fn hdr_fmt (h : hdrinfo) -> String;
372 Read bytes as header structure.
374 If the input could not be interpreted as a header, fail with
379 mag, version, paramversion, nacl, iv, ctsize, tag = \
380 struct.unpack (FMT_I2N_HDR, data)
381 except Exception as exn:
382 raise InvalidHeader ("error unpacking header from [%r]: %s"
383 % (binascii.hexlify (data), str (exn)))
385 if mag != PDTCRYPT_HDR_MAGIC:
386 raise InvalidHeader ("bad magic in header: expected [%s], got [%s]"
387 % (PDTCRYPT_HDR_MAGIC, mag))
390 { "version" : version
391 , "paramversion" : paramversion
399 def hdr_read_stream (instr):
401 Read header from stream at the current position.
403 Fail with ``InvalidHeader`` if insufficient bytes were read from the
404 stream, or if the content could not be interpreted as a header.
406 data = instr.read(PDTCRYPT_HDR_SIZE)
410 elif ldata != PDTCRYPT_HDR_SIZE:
411 raise InvalidHeader ("hdr_read_stream: expected %d B, received %d B"
412 % (PDTCRYPT_HDR_SIZE, ldata))
413 return hdr_read (data)
416 def hdr_from_params (version, paramversion, nacl, iv, ctsize, tag):
418 Assemble the necessary values into a PDTCRYPT header.
420 :type version: int to fit uint16_t
421 :type paramversion: int to fit uint16_t
422 :type nacl: bytes to fit uint8_t[16]
423 :type iv: bytes to fit uint8_t[12]
424 :type size: int to fit uint64_t
425 :type tag: bytes to fit uint8_t[16]
427 buf = bytearray (PDTCRYPT_HDR_SIZE)
428 bufv = memoryview (buf)
431 struct.pack_into (FMT_I2N_HDR, bufv, 0,
433 version, paramversion, nacl, iv, ctsize, tag)
434 except Exception as exn:
435 return False, "error assembling header: %s" % str (exn)
437 return True, bytes (buf)
440 def hdr_make_dummy (s):
442 Create a header sized block of bytes initialized to a value derived from a
443 string. Used to verify we’ve jumped back correctly to the actual position
444 of the object header.
446 c = reduce (lambda a, c: a + ord(c), s, 0) % 0xFF
447 return bytes (bytearray (struct.pack ("B", c)) * PDTCRYPT_HDR_SIZE)
452 Assemble a header from the given header structure.
454 return hdr_from_params (version=hdr.get("version"),
455 paramversion=hdr.get("paramversion"),
456 nacl=hdr.get("nacl"), iv=hdr.get("iv"),
457 ctsize=hdr.get("ctsize"), tag=hdr.get("tag"))
460 HDR_FMT = "I2n_header { version: %d, paramversion: %d, nacl: %s[%d]," \
461 " iv: %s[%d], ctsize: %d, tag: %s[%d] }"
464 """Format a header structure into readable output."""
465 return HDR_FMT % (h["version"], h["paramversion"],
466 binascii.hexlify (h["nacl"]), len(h["nacl"]),
467 binascii.hexlify (h["iv"]), len(h["iv"]),
469 binascii.hexlify (h["tag"]), len(h["tag"]))
472 def hex_spaced_of_bytes (b):
473 """Format bytes object, hexdump style."""
474 return " ".join ([ "%.2x%.2x" % (c1, c2)
475 for c1, c2 in zip (b[0::2], b[1::2]) ]) \
476 + (len (b) | 1 == len (b) and " %.2x" % b[-1] or "") # odd lengths
479 def hdr_iv_counter (h):
480 """Extract the variable part of the IV of the given header."""
481 _fixed, cnt = struct.unpack (FMT_I2N_IV, h ["iv"])
485 def hdr_iv_fixed (h):
486 """Extract the fixed part of the IV of the given header."""
487 fixed, _cnt = struct.unpack (FMT_I2N_IV, h ["iv"])
491 hdr_dump = hex_spaced_of_bytes
495 """version = %-4d : %s
496 paramversion = %-4d : %s
503 def hdr_fmt_pretty (h):
505 Format header structure into multi-line representation of its contents and
506 their raw representation. (Omit the implicit “PDTCRYPT” magic bytes that
507 precede every header.)
509 return HDR_FMT_PRETTY \
511 hex_spaced_of_bytes (struct.pack (FMT_UINT16_LE, h["version"])),
513 hex_spaced_of_bytes (struct.pack (FMT_UINT16_LE, h["paramversion"])),
514 hex_spaced_of_bytes (h["nacl"]),
515 hex_spaced_of_bytes (h["iv"]),
517 hex_spaced_of_bytes (struct.pack (FMT_UINT64_LE, h["ctsize"])),
518 hex_spaced_of_bytes (h["tag"]))
520 IV_FMT = "((f %s) (c %d))"
523 """Format the two components of an IV in a readable fashion."""
524 fixed, cnt = struct.unpack (FMT_I2N_IV, iv)
525 return IV_FMT % (binascii.hexlify (fixed), cnt)
528 ###############################################################################
530 ###############################################################################
532 class Location (object):
536 def restore_loc_fmt (loc):
538 % (loc.n, loc.offset)
540 def locate_hdr_candidates (fd):
542 Walk over instances of the magic string in the payload, collecting their
543 positions. If the offset of the first found instance is not zero, the file
544 begins with leading garbage. Used by desaster recovery.
546 :return: The list of offsets in the file.
550 mm = mmap.mmap(fd, 0, mmap.MAP_SHARED, mmap.PROT_READ)
553 pos = mm.find (PDTCRYPT_HDR_MAGIC, pos)
562 HDR_CAND_GOOD = 0 # header marks begin of valid object
563 HDR_CAND_FISHY = 1 # inconclusive (tag mismatch, obj overlap etc.)
564 HDR_CAND_JUNK = 2 # not a header / object unreadable
567 { HDR_CAND_GOOD : "valid"
568 , HDR_CAND_FISHY : "fishy"
569 , HDR_CAND_JUNK : "junk"
573 def verdict_fmt (vdt):
574 return HDR_VERDICT_NAME [vdt]
577 def inspect_hdr (fd, off):
579 Attempt to parse a header in *fd* at position *off*.
581 Returns a verdict about the quality of that header plus the parsed header
585 _ = os.lseek (fd, off, os.SEEK_SET)
587 if os.lseek (fd, 0, os.SEEK_CUR) != off:
588 if PDTCRYPT_VERBOSE is True:
589 noise ("PDT: %d → dismissed (lseek() past EOF)" % off)
590 return HDR_CAND_JUNK, None
592 raw = os.read (fd, PDTCRYPT_HDR_SIZE)
593 if len (raw) != PDTCRYPT_HDR_SIZE:
594 if PDTCRYPT_VERBOSE is True:
595 noise ("PDT: %d → dismissed (EOF inside header)" % off)
596 return HDR_CAND_JUNK, None
600 except InvalidHeader as exn:
601 if PDTCRYPT_VERBOSE is True:
602 noise ("PDT: %d → dismissed (invalid: [%s])" % (off, str (exn)))
603 return HDR_CAND_JUNK, None
605 obj0 = off + PDTCRYPT_HDR_SIZE
606 objX = obj0 + hdr ["ctsize"]
608 eof = os.lseek (fd, 0, os.SEEK_END)
610 if PDTCRYPT_VERBOSE is True:
611 noise ("PDT: %d → EOF inside object (%d≤%d≤%d); adjusting size to "
612 "%d" % (off, obj0, eof, objX, (eof - obj0)))
613 # try reading up to the end
614 hdr ["ctsize"] = eof - obj0
615 return HDR_CAND_FISHY, hdr
617 return HDR_CAND_GOOD, hdr
620 def try_decrypt (ifd, off, hdr, secret, ofd=-1):
622 Attempt to decrypt the object in the (seekable) descriptor *ifd* starting
623 at *off* using the metadata in *hdr* and *secret*. An output fd can be
624 specified with *ofd*; if it is *-1* – the default –, the decrypted payload
627 Always creates a fresh decryptor, so validation steps across objects don’t
630 Errors during GCM tag validation are ignored. Used by desaster recovery.
632 ctleft = hdr ["ctsize"]
636 if ks == PDTCRYPT_SECRET_PW:
637 decr = Decrypt (password=secret [1])
638 elif ks == PDTCRYPT_SECRET_KEY:
640 decr = Decrypt (key=key)
647 os.lseek (ifd, pos, os.SEEK_SET)
650 cnksiz = min (ctleft, PDTCRYPT_BLOCKSIZE)
651 cnk = os.read (ifd, cnksiz)
654 pt = decr.process (cnk)
659 except InvalidGCMTag:
660 noise ("PDT: GCM tag mismatch for object %d–%d"
661 % (off, off + hdr ["ctsize"]))
662 if len (pt) > 0 and ofd != -1:
665 except Exception as exn:
666 noise ("PDT: error decrypting object %d–%d@%d, %d B remaining [%s]"
667 % (off, off + hdr ["ctsize"], pos, ctleft, exn))
673 def readable_objects_offsets (ifd, secret, cands):
675 From a list of candidates, locate the ones that mark the start of actual
676 readable PDTCRYPT objects.
680 for i, cand in enumerate (cands):
681 vdt, hdr = inspect_hdr (ifd, cand)
682 if vdt == HDR_CAND_JUNK:
683 pass # ignore unreadable ones
684 elif vdt in [HDR_CAND_GOOD, HDR_CAND_FISHY]:
685 ctsize = hdr ["ctsize"]
686 off0 = cand + PDTCRYPT_HDR_SIZE
687 ok = try_decrypt (ifd, off0, hdr, secret) == ctsize
689 good.append ((cand, off0 + ctsize))
691 overlap = find_overlaps (good)
693 return [ g [0] for g in good ]
696 def reconstruct_offsets (fname, secret):
697 ifd = os.open (fname, os.O_RDONLY)
700 cands = locate_hdr_candidates (ifd)
701 return readable_objects_offsets (ifd, secret, cands)
706 ###############################################################################
708 ###############################################################################
710 def make_secret (password=None, key=None):
712 Safely create a “secret” value that consists either of a key or a password.
713 Inputs are validated: the password is accepted as (UTF-8 encoded) bytes or
714 string; for the key only a bytes object of the proper size or a base64
715 encoded string thereof is accepted.
717 If both are provided, the key is preferred over the password; no checks are
718 performed whether the key is derived from the password.
720 :returns: secret value if inputs were acceptable | None otherwise.
723 if isinstance (key, str) is True:
724 key = key.encode ("utf-8")
725 if isinstance (key, bytes) is True:
726 if len (key) == AES_KEY_SIZE:
727 return (PDTCRYPT_SECRET_KEY, key)
728 if len (key) == AES_KEY_SIZE * 2:
730 key = binascii.unhexlify (key)
731 return (PDTCRYPT_SECRET_KEY, key)
732 except binascii.Error: # garbage in string
734 if len (key) == AES_KEY_SIZE_B64:
736 key = base64.b64decode (key)
737 # the base64 processor is very tolerant and allows for
738 # arbitrary trailing and leading data thus the data obtained
739 # must be checked for the proper length
740 if len (key) == AES_KEY_SIZE:
741 return (PDTCRYPT_SECRET_KEY, key)
742 except binascii.Error: # “incorrect padding”
744 elif password is not None:
745 if isinstance (password, str) is True:
746 return (PDTCRYPT_SECRET_PW, password)
747 elif isinstance (password, bytes) is True:
749 password = password.decode ("utf-8")
750 return (PDTCRYPT_SECRET_PW, password)
751 except UnicodeDecodeError:
757 ###############################################################################
758 ## passthrough / null encryption
759 ###############################################################################
761 class PassthroughCipher (object):
763 tag = struct.pack ("<QQ", 0, 0)
765 def __init__ (self) : pass
767 def update (self, b) : return b
769 def finalize (self) : return b""
771 def finalize_with_tag (self, _) : return b""
773 ###############################################################################
774 ## convenience wrapper
775 ###############################################################################
778 def kdf_dummy (klen, password, _nacl):
780 Fake KDF for testing purposes that is called when parameter version zero is
783 q, r = divmod (klen, len (password))
784 if isinstance (password, bytes) is False:
785 password = password.encode ()
786 return password * q + password [:r], b""
789 SCRYPT_KEY_MEMO = { } # static because needed for both the info file and the archive
792 def kdf_scrypt (params, password, nacl):
794 Wrapper for the Scrypt KDF, corresponds to parameter version one. The
795 computation result is memoized based on the inputs to facilitate spawning
796 multiple encryption contexts.
801 dkLen = params["dkLen"]
804 nacl = os.urandom (params["NaCl_LEN"])
806 key_parms = (password, nacl, N, r, p, dkLen)
807 global SCRYPT_KEY_MEMO
808 if key_parms not in SCRYPT_KEY_MEMO:
809 SCRYPT_KEY_MEMO [key_parms] = \
810 pylibscrypt.scrypt (password, nacl, N, r, p, dkLen)
811 return SCRYPT_KEY_MEMO [key_parms], nacl
814 def kdf_by_version (paramversion=None, defs=None):
816 Pick the KDF handler corresponding to the parameter version or the
819 :rtype: function (password : str, nacl : str) -> str
821 if paramversion is not None:
822 defs = ENCRYPTION_PARAMETERS.get(paramversion, None)
824 raise InvalidParameter ("no encryption parameters for version %r"
826 (kdf, params) = defs["kdf"]
828 if kdf == "scrypt" : fn = kdf_scrypt
829 if kdf == "dummy" : fn = kdf_dummy
831 raise ValueError ("key derivation method %r unknown" % kdf)
832 return partial (fn, params)
835 ###############################################################################
837 ###############################################################################
839 def scrypt_hashsource (pw, ins):
841 Calculate the SCRYPT hash from the password and the information contained
842 in the first header found in ``ins``.
844 This does not validate whether the first object is encrypted correctly.
846 if isinstance (pw, str) is True:
848 elif isinstance (pw, bytes) is False:
849 raise InvalidParameter ("password must be a string, not %s"
851 if isinstance (ins, io.BufferedReader) is False and \
852 isinstance (ins, io.FileIO) is False:
853 raise InvalidParameter ("file to hash must be opened in “binary” mode")
856 hdr = hdr_read_stream (ins)
857 except EndOfFile as exn:
858 noise ("PDT: malformed input: end of file reading first object header")
863 pver = hdr ["paramversion"]
864 if PDTCRYPT_VERBOSE is True:
865 noise ("PDT: salt of first object : %s" % binascii.hexlify (nacl))
866 noise ("PDT: parameter version of archive : %d" % pver)
869 defs = ENCRYPTION_PARAMETERS.get(pver, None)
870 kdfname, params = defs ["kdf"]
871 if kdfname != "scrypt":
872 noise ("PDT: input is not an SCRYPT archive")
875 kdf = kdf_by_version (None, defs)
876 except ValueError as exn:
877 noise ("PDT: object has unknown parameter version %d" % pver)
879 hsh, _void = kdf (pw, nacl)
881 return hsh, nacl, hdr ["version"], pver
884 def scrypt_hashfile (pw, fname):
886 Calculate the SCRYPT hash from the password and the information contained
887 in the first header found in the given file. The header is read only at
890 with deptdcrypt_mk_stream (PDTCRYPT_SOURCE, fname or "-") as ins:
891 hsh, _void, _void, _void = scrypt_hashsource (pw, ins)
895 ###############################################################################
897 ###############################################################################
899 class Crypto (object):
901 Encryption context to remain alive throughout an entire tarfile pass.
906 cnt = None # file counter (uint32_t != 0)
907 iv = None # current IV
908 fixed = None # accu for 64 bit fixed parts of IV
909 used_ivs = None # tracks IVs
910 strict_ivs = False # if True, panic on duplicate object IV
919 info_counter_used = False
920 index_counter_used = False
922 def __init__ (self, *al, **akv):
923 self.used_ivs = set ()
924 self.set_parameters (*al, **akv)
927 def next_fixed (self):
932 def set_object_counter (self, cnt=None):
934 Safely set the internal counter of encrypted objects. Numerous
937 The same counter may not be reused in combination with one IV fixed
938 part. This is validated elsewhere in the IV handling.
940 Counter zero is invalid. The first two counters are reserved for
941 metadata. The implementation does not allow for splitting metadata
942 files over multiple encrypted objects. (This would be possible by
943 assigning new fixed parts.) Thus in a Deltatar backup there is at most
944 one object with a counter value of one and two. On creation of a
945 context, the initial counter may be chosen. The globals
946 ``AES_GCM_IV_CNT_INFOFILE`` and ``AES_GCM_IV_CNT_INDEX`` can be used to
947 request one of the reserved values. If one of these values has been
948 used, any further attempt of setting the counter to that value will
949 be rejected with an ``InvalidFileCounter`` exception.
951 Out of bounds values (i. e. below one and more than the maximum of 2³²)
952 cause an ``InvalidParameter`` exception to be thrown.
955 self.cnt = AES_GCM_IV_CNT_DATA
957 if cnt == 0 or cnt > AES_GCM_IV_CNT_MAX + 1:
958 raise InvalidParameter ("invalid counter value %d requested: "
959 "acceptable values are from 1 to %d"
960 % (cnt, AES_GCM_IV_CNT_MAX))
961 if cnt == AES_GCM_IV_CNT_INFOFILE:
962 if self.info_counter_used is True:
963 raise InvalidFileCounter ("attempted to reuse info file "
964 "counter %d: must be unique" % cnt)
965 self.info_counter_used = True
966 elif cnt == AES_GCM_IV_CNT_INDEX:
967 if self.index_counter_used is True:
968 raise InvalidFileCounter ("attempted to reuse index file "
969 " counter %d: must be unique" % cnt)
970 self.index_counter_used = True
971 if cnt <= AES_GCM_IV_CNT_MAX:
974 # cnt == AES_GCM_IV_CNT_MAX + 1 → wrap
975 self.cnt = AES_GCM_IV_CNT_DATA
979 def set_parameters (self, password=None, key=None, paramversion=None,
980 nacl=None, counter=None, strict_ivs=False):
982 Configure the internal state of a crypto context. Not intended for
986 self.set_object_counter (counter)
987 self.strict_ivs = strict_ivs
989 if paramversion is not None:
990 self.paramversion = paramversion
993 self.key, self.nacl = key, nacl
996 if password is not None:
997 if isinstance (password, bytes) is False:
998 password = str.encode (password)
999 self.password = password
1000 if paramversion is None and nacl is None:
1001 # postpone key setup until first header is available
1003 kdf = kdf_by_version (paramversion)
1005 self.key, self.nacl = kdf (password, nacl)
1008 def process (self, buf):
1010 Encrypt / decrypt a buffer. Invokes the ``.update()`` method on the
1011 wrapped encryptor or decryptor, respectively.
1013 The Cryptography exception ``AlreadyFinalized`` is translated to an
1014 ``InternalError`` at this point. It may occur in sound code when the GC
1015 closes an encrypting stream after an error. Everywhere else it must be
1018 if self.enc is None:
1019 raise RuntimeError ("process: context not initialized")
1020 self.stats ["in"] += len (buf)
1022 out = self.enc.update (buf)
1023 except cryptography.exceptions.AlreadyFinalized as exn:
1024 raise InternalError (exn)
1025 self.stats ["out"] += len (out)
1029 def next (self, password, paramversion, nacl, iv):
1031 Prepare for encrypting another object: Reset the data counters and
1032 change the configuration in case one of the variable parameters differs
1033 from the last object. Also check the IV for duplicates and error out
1034 if strict checking was requested.
1038 self.stats ["obj"] += 1
1040 self.check_duplicate_iv (iv)
1042 if ( self.paramversion != paramversion
1043 or self.password != password
1044 or self.nacl != nacl):
1045 self.set_parameters (password=password, paramversion=paramversion,
1046 nacl=nacl, strict_ivs=self.strict_ivs)
1049 def check_duplicate_iv (self, iv):
1051 Add an IV (the 12 byte representation as in the header) to the list. With
1052 strict checking enabled, this will throw a ``DuplicateIV``. Depending on
1053 the context, this may indicate a serious error (IV reuse).
1055 if self.strict_ivs is True and iv in self.used_ivs:
1056 raise DuplicateIV ("iv %s was reused" % iv_fmt (iv))
1057 # vi has not been used before; add to collection
1058 self.used_ivs.add (iv)
1061 def counters (self):
1063 Access the data counters.
1065 return self.stats ["obj"], self.stats ["in"], self.stats ["out"]
1070 Clear the current context regardless of its finalization state. The
1071 next operation must be ``.next()``.
1076 class Encrypt (Crypto):
1082 def __init__ (self, version, paramversion, password=None, key=None, nacl=None,
1083 counter=AES_GCM_IV_CNT_DATA, strict_ivs=True):
1085 The ctor will throw immediately if one of the parameters does not conform
1086 to our expectations.
1088 counter=AES_GCM_IV_CNT_DATA, strict_ivs=True):
1089 :type version: int to fit uint16_t
1090 :type paramversion: int to fit uint16_t
1091 :param password: mutually exclusive with ``key``
1092 :type password: bytes
1093 :param key: mutually exclusive with ``password``
1096 :type counter: initial object counter the values
1097 ``AES_GCM_IV_CNT_INFOFILE`` and
1098 ``AES_GCM_IV_CNT_INDEX`` are unique in each backup set
1099 and cannot be reused even with different fixed parts.
1100 :type strict_ivs: bool
1102 if password is None and key is None \
1103 or password is not None and key is not None :
1104 raise InvalidParameter ("__init__: need either key or password")
1107 if isinstance (key, bytes) is False:
1108 raise InvalidParameter ("__init__: key must be provided as "
1109 "bytes, not %s" % type (key))
1111 raise InvalidParameter ("__init__: salt must be provided along "
1112 "with encryption key")
1113 else: # password, no key
1114 if isinstance (password, str) is False:
1115 raise InvalidParameter ("__init__: password must be a string, not %s"
1117 if len (password) == 0:
1118 raise InvalidParameter ("__init__: supplied empty password but not "
1119 "permitted for PDT encrypted files")
1121 if isinstance (version, int) is False:
1122 raise InvalidParameter ("__init__: version number must be an "
1123 "integer, not %s" % type (version))
1125 raise InvalidParameter ("__init__: version number must be a "
1126 "nonnegative integer, not %d" % version)
1128 if isinstance (paramversion, int) is False:
1129 raise InvalidParameter ("__init__: crypto parameter version number "
1130 "must be an integer, not %s"
1131 % type (paramversion))
1132 if paramversion < 0:
1133 raise InvalidParameter ("__init__: crypto parameter version number "
1134 "must be a nonnegative integer, not %d"
1137 if nacl is not None:
1138 if isinstance (nacl, bytes) is False:
1139 raise InvalidParameter ("__init__: salt given, but of type %s "
1140 "instead of bytes" % type (nacl))
1141 # salt length would depend on the actual encryption so it can’t be
1142 # validated at this point
1144 self.version = version
1145 self.paramenc = ENCRYPTION_PARAMETERS.get (paramversion) ["enc"]
1147 super().__init__ (password, key, paramversion, nacl, counter=counter,
1148 strict_ivs=strict_ivs)
1151 def next_fixed (self, retries=PDTCRYPT_IV_GEN_MAX_RETRIES):
1153 Generate the next IV fixed part by reading eight bytes from
1154 ``/dev/urandom``. The buffer so obtained is tested against the fixed
1155 parts used so far to prevent accidental reuse of IVs. After a
1156 configurable number of attempts to create a unique fixed part, it will
1157 refuse to continue with an ``IVFixedPartError``. This is unlikely to
1158 ever happen on a normal system but may detect an issue with the random
1161 The list of fixed parts that were used by the context at hand can be
1162 accessed through the ``.fixed`` list. Its last element is the fixed
1163 part currently in use.
1167 fp = os.urandom (PDTCRYPT_IV_FIXEDPART_SIZE)
1168 if fp not in self.fixed:
1169 self.fixed.append (fp)
1172 raise IVFixedPartError ("error obtaining a unique IV fixed part from "
1173 "/dev/urandom; giving up after %d tries" % i)
1178 Construct a 12-bytes IV from the current fixed part and the object
1181 return struct.pack(FMT_I2N_IV, self.fixed [-1], self.cnt)
1184 def next (self, filename=None, counter=None):
1186 Prepare for encrypting the next incoming object. Update the counter
1187 and put together the IV, possibly changing prefixes. Then create the
1190 The argument ``counter`` can be used to specify a file counter for this
1191 object. Unless it is one of the reserved values, the counter of
1192 subsequent objects will be computed from this one.
1194 If this is the first object in a series, ``filename`` is required,
1195 otherwise it is reused if not present. The value is used to derive a
1196 header sized placeholder to use until after encryption when all the
1197 inputs to construct the final header are available. This is then
1198 matched in ``.done()`` against the value found at the position of the
1199 header. The motivation for this extra check is primarily to assist
1200 format debugging: It makes stray headers easy to spot in malformed
1203 if filename is None:
1204 if self.lastinfo is None:
1205 raise InvalidParameter ("next: filename is mandatory for "
1207 filename, _dummy = self.lastinfo
1209 if isinstance (filename, str) is False:
1210 raise InvalidParameter ("next: filename must be a string, no %s"
1212 if counter is not None:
1213 if isinstance (counter, int) is False:
1214 raise InvalidParameter ("next: the supplied counter is of "
1215 "invalid type %s; please pass an "
1216 "integer instead" % type (counter))
1217 self.set_object_counter (counter)
1219 self.iv = self.iv_make ()
1220 if self.paramenc == "aes-gcm":
1222 ( algorithms.AES (self.key)
1223 , modes.GCM (self.iv)
1224 , backend = default_backend ()) \
1226 elif self.paramenc == "passthrough":
1227 self.enc = PassthroughCipher ()
1229 raise InvalidParameter ("next: parameter version %d not known"
1230 % self.paramversion)
1231 hdrdum = hdr_make_dummy (filename)
1232 self.lastinfo = (filename, hdrdum)
1233 super().next (self.password, self.paramversion, self.nacl, self.iv)
1235 self.set_object_counter (self.cnt + 1)
1239 def done (self, cmpdata):
1241 Complete encryption of an object. After this has been called, attempts
1242 of encrypting further data will cause an error until ``.next()`` is
1245 Returns a 64 bytes buffer containing the object header including all
1246 values including the “late” ones e. g. the ciphertext size and the
1249 if isinstance (cmpdata, bytes) is False:
1250 raise InvalidParameter ("done: comparison input expected as bytes, "
1251 "not %s" % type (cmpdata))
1252 if self.lastinfo is None:
1253 raise RuntimeError ("done: encryption context not initialized")
1254 filename, hdrdum = self.lastinfo
1255 if cmpdata != hdrdum:
1256 raise RuntimeError ("done: bad sync of header for object %d: "
1257 "preliminary data does not match; this likely "
1258 "indicates a wrongly repositioned stream"
1260 data = self.enc.finalize ()
1261 self.stats ["out"] += len (data)
1262 self.ctsize += len (data)
1263 ok, hdr = hdr_from_params (self.version, self.paramversion, self.nacl,
1264 self.iv, self.ctsize, self.enc.tag)
1266 raise InternalError ("error constructing header: %r" % hdr)
1267 return data, hdr, self.fixed
1270 def process (self, buf):
1272 Encrypt a chunk of plaintext with the active encryptor. Returns the
1273 size of the input consumed. This **must** be checked downstream. If the
1274 maximum possible object size has been reached, the current context must
1275 be finalized and a new one established before any further data can be
1276 encrypted. The second argument is the remainder of the plaintext that
1277 was not encrypted for the caller to use immediately after the new
1280 if isinstance (buf, bytes) is False:
1281 raise InvalidParameter ("process: expected byte buffer, not %s"
1284 newptsize = self.ptsize + bsize
1285 diff = newptsize - PDTCRYPT_MAX_OBJ_SIZE
1288 newptsize = PDTCRYPT_MAX_OBJ_SIZE
1289 self.ptsize = newptsize
1290 data = super().process (buf [:bsize])
1291 self.ctsize += len (data)
1295 class Decrypt (Crypto):
1297 tag = None # GCM tag, part of header
1298 last_iv = None # check consecutive ivs in strict mode
1301 def __init__ (self, password=None, key=None, counter=None, fixedparts=None,
1304 Sanitizing ctor for the decryption context. ``fixedparts`` specifies a
1305 list of IV fixed parts accepted during decryption. If a fixed part is
1306 encountered that is not in the list, decryption will fail.
1308 :param password: mutually exclusive with ``key``
1309 :type password: bytes
1310 :param key: mutually exclusive with ``password``
1312 :type counter: initial object counter the values
1313 ``AES_GCM_IV_CNT_INFOFILE`` and
1314 ``AES_GCM_IV_CNT_INDEX`` are unique in each backup set
1315 and cannot be reused even with different fixed parts.
1316 :type fixedparts: bytes list
1318 if password is None and key is None \
1319 or password is not None and key is not None :
1320 raise InvalidParameter ("__init__: need either key or password")
1323 if isinstance (key, bytes) is False:
1324 raise InvalidParameter ("__init__: key must be provided as "
1325 "bytes, not %s" % type (key))
1326 else: # password, no key
1327 if isinstance (password, str) is False:
1328 raise InvalidParameter ("__init__: password must be a string, not %s"
1330 if len (password) == 0:
1331 raise InvalidParameter ("__init__: supplied empty password but not "
1332 "permitted for PDT encrypted files")
1334 if fixedparts is not None:
1335 if isinstance (fixedparts, list) is False:
1336 raise InvalidParameter ("__init__: IV fixed parts must be "
1337 "supplied as list, not %s"
1338 % type (fixedparts))
1339 self.fixed = fixedparts
1342 super().__init__ (password=password, key=key, counter=counter,
1343 strict_ivs=strict_ivs)
1346 def valid_fixed_part (self, iv):
1348 Check if a fixed part was already seen.
1350 # check if fixed part is known
1351 fixed, _cnt = struct.unpack (FMT_I2N_IV, iv)
1352 i = bisect.bisect_left (self.fixed, fixed)
1353 return i != len (self.fixed) and self.fixed [i] == fixed
1356 def check_consecutive_iv (self, iv):
1358 Check whether the counter part of the given IV is indeed the successor
1359 of the currently present counter. This should always be the case for
1360 the objects in a well formed PDT archive but should not be enforced
1361 when decrypting out-of-order.
1363 fixed, cnt = struct.unpack (FMT_I2N_IV, iv)
1364 if self.strict_ivs is True \
1365 and self.last_iv is not None \
1366 and self.last_iv [0] == fixed \
1367 and self.last_iv [1] != cnt - 1:
1368 raise NonConsecutiveIV ("iv %s counter not successor of "
1369 "last object (expected %d, found %d)"
1370 % (fixed, iv_fmt (self.last_iv [1]), cnt))
1371 self.last_iv = (fixed, cnt)
1374 def next (self, hdr):
1376 Start decrypting the next object. The PDTCRYPT header for the object
1377 can be given either as already parsed object or as bytes.
1379 if isinstance (hdr, bytes) is True:
1380 hdr = hdr_read (hdr)
1381 elif isinstance (hdr, dict) is False:
1382 # this won’t catch malformed specs though
1383 raise InvalidParameter ("next: wrong type of parameter hdr: "
1384 "expected bytes or spec, got %s"
1387 paramversion = hdr ["paramversion"]
1391 ctsize = hdr ["ctsize"]
1393 raise InvalidHeader ("next: not a header %r" % hdr)
1395 if ctsize > PDTCRYPT_MAX_OBJ_SIZE:
1396 raise InvalidHeader ("next: ciphertext size %d exceeds maximum "
1398 % (ctsize, PDTCRYPT_MAX_OBJ_SIZE))
1400 self.hdr_ctsize = ctsize
1402 super().next (self.password, paramversion, nacl, iv)
1403 if self.fixed is not None and self.valid_fixed_part (iv) is False:
1404 raise InvalidIVFixedPart ("iv %s has invalid fixed part"
1406 self.check_consecutive_iv (iv)
1409 defs = ENCRYPTION_PARAMETERS.get (paramversion, None)
1411 raise FormatError ("header contains unknown parameter version %d; "
1412 "maybe the file was created by a more recent "
1413 "version of Deltatar" % paramversion)
1415 if enc == "aes-gcm":
1417 ( algorithms.AES (self.key)
1418 , modes.GCM (iv, tag=self.tag)
1419 , backend = default_backend ()) \
1421 elif enc == "passthrough":
1422 self.enc = PassthroughCipher ()
1424 raise InternalError ("encryption parameter set %d refers to unknown "
1425 "mode %r" % (paramversion, enc))
1426 self.set_object_counter (self.cnt + 1)
1429 def done (self, tag=None):
1431 Stop decryption of the current object and finalize it with the active
1432 context. This will throw an *InvalidGCMTag* exception to indicate that
1433 the authentication tag does not match the data. If the tag is correct,
1434 the rest of the plaintext is returned.
1439 data = self.enc.finalize ()
1441 if isinstance (tag, bytes) is False:
1442 raise InvalidParameter ("done: wrong type of parameter "
1443 "tag: expected bytes, got %s"
1445 data = self.enc.finalize_with_tag (self.tag)
1446 except cryptography.exceptions.InvalidTag:
1447 raise InvalidGCMTag ("done: tag mismatch of object %d: %s "
1448 "rejected by finalize ()"
1449 % (self.cnt, binascii.hexlify (self.tag)))
1450 self.ptsize += len (data)
1451 self.stats ["out"] += len (data)
1453 assert self.ctsize == self.ptsize == self.hdr_ctsize
1458 def process (self, buf):
1460 Decrypt the bytes object *buf* with the active decryptor.
1462 if isinstance (buf, bytes) is False:
1463 raise InvalidParameter ("process: expected byte buffer, not %s"
1465 self.ctsize += len (buf)
1466 if self.ctsize > self.hdr_ctsize:
1467 raise CiphertextTooLong ("process: object length exceeded: got "
1468 "%d B but header specfiies %d B"
1469 % (self.ctsize, self.hdr_ctsize))
1471 data = super().process (buf)
1472 self.ptsize += len (data)
1476 ###############################################################################
1478 ###############################################################################
1480 def _patch_global (glob, vow, n=None):
1482 Adapt upper file counter bound for testing IV logic. Completely unsafe.
1484 assert vow == "I am fully aware that this will void my warranty."
1485 r = globals () [glob]
1487 n = globals () [glob + "_DEFAULT"]
1488 globals () [glob] = n
1491 _testing_set_AES_GCM_IV_CNT_MAX = \
1492 partial (_patch_global, "AES_GCM_IV_CNT_MAX")
1494 _testing_set_PDTCRYPT_MAX_OBJ_SIZE = \
1495 partial (_patch_global, "PDTCRYPT_MAX_OBJ_SIZE")
1497 def open2_dump_file (fname, dir_fd, force=False):
1500 oflags = os.O_CREAT | os.O_WRONLY
1502 oflags |= os.O_TRUNC
1507 outfd = os.open (fname, oflags,
1508 stat.S_IRUSR | stat.S_IWUSR, dir_fd=dir_fd)
1509 except FileExistsError as exn:
1510 noise ("PDT: refusing to overwrite existing file %s" % fname)
1512 raise RuntimeError ("destination file %s already exists" % fname)
1513 if PDTCRYPT_VERBOSE is True:
1514 noise ("PDT: new output file %s (fd=%d)" % (fname, outfd))
1518 ###############################################################################
1519 ## freestanding invocation
1520 ###############################################################################
1522 PDTCRYPT_SUB_PROCESS = 0
1523 PDTCRYPT_SUB_SCRYPT = 1
1524 PDTCRYPT_SUB_SCAN = 2
1527 { "process" : PDTCRYPT_SUB_PROCESS
1528 , "scrypt" : PDTCRYPT_SUB_SCRYPT
1529 , "scan" : PDTCRYPT_SUB_SCAN }
1531 PDTCRYPT_DECRYPT = 1 << 0 # decrypt archive with password
1532 PDTCRYPT_SPLIT = 1 << 1 # split archive into individual objects
1533 PDTCRYPT_HASH = 1 << 2 # output scrypt hash for file and given password
1535 PDTCRYPT_SPLITNAME = "pdtcrypt-object-%d.bin"
1536 PDTCRYPT_RESCUENAME = "pdtcrypt-rescue-object-%0.5d.bin"
1538 PDTCRYPT_VERBOSE = False
1539 PDTCRYPT_STRICTIVS = False
1540 PDTCRYPT_OVERWRITE = False
1541 PDTCRYPT_BLOCKSIZE = 1 << 12
1546 PDTCRYPT_DEFAULT_VER = 1
1547 PDTCRYPT_DEFAULT_PVER = 1
1549 # scrypt hashing output control
1550 PDTCRYPT_SCRYPT_INTRANATOR = 0
1551 PDTCRYPT_SCRYPT_PARAMETERS = 1
1552 PDTCRYPT_SCRYPT_DEFAULT = PDTCRYPT_SCRYPT_INTRANATOR
1554 PDTCRYPT_SCRYPT_FORMAT = \
1555 { "i2n" : PDTCRYPT_SCRYPT_INTRANATOR
1556 , "params" : PDTCRYPT_SCRYPT_PARAMETERS }
1558 PDTCRYPT_TT_COLUMNS = 80 # assume standard terminal
1560 class PDTDecryptionError (Exception):
1561 """Decryption failed."""
1563 class PDTSplitError (Exception):
1564 """Decryption failed."""
1567 def noise (*a, **b):
1568 print (file=sys.stderr, *a, **b)
1571 class PassthroughDecryptor (object):
1573 curhdr = None # write current header on first data write
1575 def __init__ (self):
1576 if PDTCRYPT_VERBOSE is True:
1577 noise ("PDT: no encryption; data passthrough")
1579 def next (self, hdr):
1580 ok, curhdr = hdr_make (hdr)
1582 raise PDTDecryptionError ("bad header %r" % hdr)
1583 self.curhdr = curhdr
1586 if self.curhdr is not None:
1590 def process (self, d):
1591 if self.curhdr is not None:
1597 def depdtcrypt (mode, secret, ins, outs):
1599 Remove PDTCRYPT layer from all objects encrypted with the secret. Used on a
1600 Deltatar backup this will yield a (possibly Gzip compressed) tarball.
1602 ctleft = -1 # length of ciphertext to consume
1603 ctcurrent = 0 # total ciphertext of current object
1604 total_obj = 0 # total number of objects read
1605 total_pt = 0 # total plaintext bytes
1606 total_ct = 0 # total ciphertext bytes
1607 total_read = 0 # total bytes read
1608 outfile = None # Python file object for output
1610 if mode & PDTCRYPT_DECRYPT: # decryptor
1612 if ks == PDTCRYPT_SECRET_PW:
1613 decr = Decrypt (password=secret [1], strict_ivs=PDTCRYPT_STRICTIVS)
1614 elif ks == PDTCRYPT_SECRET_KEY:
1616 decr = Decrypt (key=key, strict_ivs=PDTCRYPT_STRICTIVS)
1618 raise InternalError ("‘%d’ does not specify a valid kind of secret"
1621 decr = PassthroughDecryptor ()
1624 """Dummy for non-split mode: output file does not vary."""
1627 if mode & PDTCRYPT_SPLIT:
1628 def nextout (outfile):
1630 We were passed an fd as outs for accessing the destination
1631 directory where extracted archive components are supposed
1636 if PDTCRYPT_VERBOSE is True:
1637 noise ("PDT: no output file to close at this point")
1639 if PDTCRYPT_VERBOSE is True:
1640 noise ("PDT: release output file %r" % outfile)
1641 # cleanup happens automatically by the GC; the next
1642 # line will error out on account of an invalid fd
1645 assert total_obj > 0
1646 fname = PDTCRYPT_SPLITNAME % total_obj
1648 outfd = open2_dump_file (fname, outs, force=PDTCRYPT_OVERWRITE)
1649 except RuntimeError as exn:
1650 raise PDTSplitError (exn)
1651 return os.fdopen (outfd, "wb", closefd=True)
1655 """ESPIPE is normal on non-seekable stdio stream."""
1658 except OSError as exn:
1659 if exn.errno == errno.ESPIPE:
1662 def out (pt, outfile):
1666 if PDTCRYPT_VERBOSE is True:
1667 noise ("PDT:\t· decrypt plaintext %d B" % (npt))
1669 nn = outfile.write (pt)
1670 except OSError as exn: # probably ENOSPC
1671 raise DecryptionError ("error (%s)" % exn)
1673 raise DecryptionError ("write aborted after %d of %d B" % (nn, npt))
1677 # current object completed; in a valid archive this marks either
1678 # the start of a new header or the end of the input
1679 if ctleft == 0: # current object requires finalization
1680 if PDTCRYPT_VERBOSE is True:
1681 noise ("PDT: %d finalize" % tell (ins))
1684 except InvalidGCMTag as exn:
1685 raise DecryptionError ("error finalizing object %d (%d B): "
1686 "%r" % (total_obj, len (pt), exn)) \
1689 if PDTCRYPT_VERBOSE is True:
1690 noise ("PDT:\t· object validated")
1692 if PDTCRYPT_VERBOSE is True:
1693 noise ("PDT: %d hdr" % tell (ins))
1695 hdr = hdr_read_stream (ins)
1696 total_read += PDTCRYPT_HDR_SIZE
1697 except EndOfFile as exn:
1698 total_read += exn.remainder
1699 if total_ct + total_obj * PDTCRYPT_HDR_SIZE != total_read:
1700 raise PDTDecryptionError ("ciphertext processed (%d B) plus "
1701 "overhead (%d × %d B) does not match "
1702 "the number of bytes read (%d )"
1703 % (total_ct, total_obj, PDTCRYPT_HDR_SIZE,
1705 # the single good exit
1706 return total_read, total_obj, total_ct, total_pt
1707 except InvalidHeader as exn:
1708 raise PDTDecryptionError ("invalid header at position %d in %r "
1709 "(%s)" % (tell (ins), exn, ins))
1710 if PDTCRYPT_VERBOSE is True:
1711 pretty = hdr_fmt_pretty (hdr)
1712 noise (reduce (lambda a, e: (a + "\n" if a else "") + "PDT:\t· " + e,
1713 pretty.splitlines (), ""))
1714 ctcurrent = ctleft = hdr ["ctsize"]
1718 total_obj += 1 # used in file counter with split mode
1720 # finalization complete or skipped in case of first object in
1721 # stream; create a new output file if necessary
1722 outfile = nextout (outfile)
1724 if PDTCRYPT_VERBOSE is True:
1725 noise ("PDT: %d decrypt obj no. %d, %d B"
1726 % (tell (ins), total_obj, ctleft))
1728 # always allocate a new buffer since python-cryptography doesn’t allow
1729 # passing a bytearray :/
1730 nexpect = min (ctleft, PDTCRYPT_BLOCKSIZE)
1731 if PDTCRYPT_VERBOSE is True:
1732 noise ("PDT:\t· [%d] %d%% done, read block (%d B of %d B remaining)"
1734 100 - ctleft * 100 / (ctcurrent > 0 and ctcurrent or 1),
1736 ct = ins.read (nexpect)
1740 raise EndOfFile (nct,
1741 "hit EOF after %d of %d B in block [%d:%d); "
1742 "%d B ciphertext remaining for object no %d"
1743 % (nct, nexpect, off, off + nexpect, ctleft,
1749 if PDTCRYPT_VERBOSE is True:
1750 noise ("PDT:\t· decrypt ciphertext %d B" % (nct))
1751 pt = decr.process (ct)
1755 def deptdcrypt_mk_stream (kind, path):
1756 """Create stream from file or stdio descriptor."""
1757 if kind == PDTCRYPT_SINK:
1759 if PDTCRYPT_VERBOSE is True: noise ("PDT: sink: stdout")
1760 return sys.stdout.buffer
1762 if PDTCRYPT_VERBOSE is True: noise ("PDT: sink: file %s" % path)
1763 return io.FileIO (path, "w")
1764 if kind == PDTCRYPT_SOURCE:
1766 if PDTCRYPT_VERBOSE is True: noise ("PDT: source: stdin")
1767 return sys.stdin.buffer
1769 if PDTCRYPT_VERBOSE is True: noise ("PDT: source: file %s" % path)
1770 return io.FileIO (path, "r")
1772 raise ValueError ("bogus stream “%s” / %s" % (kind, path))
1775 def mode_depdtcrypt (mode, secret, ins, outs):
1777 total_read, total_obj, total_ct, total_pt = \
1778 depdtcrypt (mode, secret, ins, outs)
1779 except DecryptionError as exn:
1780 noise ("PDT: Decryption failed:")
1782 noise ("PDT: “%s”" % exn)
1784 noise ("PDT: Did you specify the correct key / password?")
1787 except PDTSplitError as exn:
1788 noise ("PDT: Split operation failed:")
1790 noise ("PDT: “%s”" % exn)
1792 noise ("PDT: Hint: target directory should be empty.")
1796 if PDTCRYPT_VERBOSE is True:
1797 noise ("PDT: decryption successful" )
1798 noise ("PDT: %.10d bytes read" % total_read)
1799 noise ("PDT: %.10d objects decrypted" % total_obj )
1800 noise ("PDT: %.10d bytes ciphertext" % total_ct )
1801 noise ("PDT: %.10d bytes plaintext" % total_pt )
1807 def mode_scrypt (pw, ins=None, nacl=None, fmt=PDTCRYPT_SCRYPT_INTRANATOR):
1809 paramversion = PDTCRYPT_DEFAULT_PVER
1811 hsh, nacl, version, paramversion = scrypt_hashsource (pw, ins)
1812 defs = ENCRYPTION_PARAMETERS.get(paramversion, None)
1814 nacl = binascii.unhexlify (nacl)
1815 defs = ENCRYPTION_PARAMETERS.get(paramversion, None)
1816 version = PDTCRYPT_DEFAULT_VER
1818 kdfname, params = defs ["kdf"]
1820 kdf = kdf_by_version (None, defs)
1821 hsh, _void = kdf (pw, nacl)
1825 if fmt == PDTCRYPT_SCRYPT_INTRANATOR:
1826 out = json.dumps ({ "salt" : base64.b64encode (nacl).decode ()
1827 , "key" : base64.b64encode (hsh) .decode ()
1828 , "paramversion" : paramversion })
1829 elif fmt == PDTCRYPT_SCRYPT_PARAMETERS:
1830 out = json.dumps ({ "salt" : binascii.hexlify (nacl).decode ()
1831 , "key" : binascii.hexlify (hsh) .decode ()
1832 , "version" : version
1833 , "scrypt_params" : { "N" : params ["N"]
1834 , "r" : params ["r"]
1835 , "p" : params ["p"]
1836 , "dkLen" : params ["dkLen"] } })
1838 raise RuntimeError ("bad scrypt output scheme %r" % fmt)
1843 def noise_output_candidates (cands, indent=8, cols=PDTCRYPT_TT_COLUMNS):
1845 Print a list of offsets without garbling the terminal too much.
1847 The indent is counted from column zero; if it is wide enough, the “PDT: ”
1848 marker will be prepended, considered part of the indentation.
1852 idt = " " * indent if indent < 5 else "PDT: " + " " * (indent - 5)
1857 init = True # prevent leading separator
1860 raise ValueError ("the requested indentation exceeds the line "
1861 "width by %d" % (indent - wd))
1871 if lpos > wd: # line break
1887 SLICE_START = 1 # ordering is important to have starts of intervals
1888 SLICE_END = 0 # sorted before equal ends
1890 def find_overlaps (slices):
1892 Find overlapping slices: iterate open/close points of intervals, tracking
1893 the ones open at any time.
1896 inside = set () # of indices into bounds
1897 ovrlp = set () # of indices into bounds
1899 for i, s in enumerate (slices):
1900 bounds.append ((s [0], SLICE_START, i))
1901 bounds.append ((s [1], SLICE_END , i))
1902 bounds = sorted (bounds)
1906 if val [1] == SLICE_START:
1909 if len (inside) > 1: # closing one that overlapped
1913 return [ slices [i] for i in ovrlp ]
1916 def mode_scan (secret, fname, outs=None, nacl=None):
1918 Dissect a binary file, looking for PDTCRYPT headers and objects.
1920 If *outs* is supplied, recoverable data will be dumped into the specified
1924 ifd = os.open (fname, os.O_RDONLY)
1925 except FileNotFoundError:
1926 noise ("PDT: failed to open %s readonly" % fname)
1931 if PDTCRYPT_VERBOSE is True:
1932 noise ("PDT: scan for potential sync points")
1933 cands = locate_hdr_candidates (ifd)
1934 if len (cands) == 0:
1935 noise ("PDT: scan complete: input does not contain potential PDT "
1936 "headers; giving up.")
1938 if PDTCRYPT_VERBOSE is True:
1939 noise ("PDT: scan complete: found %d candidates:" % len (cands))
1940 noise_output_candidates (cands)
1945 junk, todo, slices = [], [], []
1950 vdt, hdr = inspect_hdr (ifd, cand)
1952 vdts = verdict_fmt (vdt)
1954 if vdt == HDR_CAND_JUNK:
1955 noise ("PDT: obj %d: %s object: bad header, skipping" % vdts)
1958 off0 = cand + PDTCRYPT_HDR_SIZE
1959 if PDTCRYPT_VERBOSE is True:
1960 noise ("PDT: obj %d: read payload @%d" % (nobj, off0))
1961 pretty = hdr_fmt_pretty (hdr)
1962 noise (reduce (lambda a, e: (a + "\n" if a else "") + "PDT:\t· " + e,
1963 pretty.splitlines (), ""))
1966 if outs is not None:
1967 ofname = PDTCRYPT_RESCUENAME % nobj
1968 ofd = open2_dump_file (ofname, outs, force=PDTCRYPT_OVERWRITE)
1970 ctsize = hdr ["ctsize"]
1972 l = try_decrypt (ifd, off0, hdr, secret, ofd=ofd)
1974 slices.append ((off0, off0 + l))
1978 if vdt == HDR_CAND_GOOD and ok is True:
1979 noise ("PDT: %d → ✓ %s object %d–%d"
1980 % (cand, vdts, off0, off0 + ctsize))
1981 elif vdt == HDR_CAND_FISHY and ok is True:
1982 noise ("PDT: %d → × %s object %d–%d, corrupt header"
1983 % (cand, vdts, off0, off0 + ctsize))
1984 elif vdt == HDR_CAND_GOOD and ok is False:
1985 noise ("PDT: %d → × %s object %d–%d, problematic payload"
1986 % (cand, vdts, off0, off0 + ctsize))
1987 elif vdt == HDR_CAND_FISHY and ok is False:
1988 noise ("PDT: %d → × %s object %d–%d, corrupt header, problematic "
1989 "ciphertext" % (cand, vdts, off0, off0 + ctsize))
1996 noise ("PDT: all headers ok")
1998 noise ("PDT: %d candidates not parseable as headers:" % len (junk))
1999 noise_output_candidates (junk)
2001 overlap = find_overlaps (slices)
2002 if len (overlap) > 0:
2003 noise ("PDT: %d objects overlapping others" % len (overlap))
2004 for slice in overlap:
2005 noise ("PDT: × %d→%d" % (slice [0], slice [1]))
2007 def usage (err=False):
2011 indent = ' ' * len (SELF)
2012 out ("usage: %s SUBCOMMAND { --help" % SELF)
2013 out (" %s | [ -v ] { -p PASSWORD | -k KEY }" % indent)
2014 out (" %s [ { -i | --in } { - | SOURCE } ]" % indent)
2015 out (" %s [ { -n | --nacl } { SALT } ]" % indent)
2016 out (" %s [ { -o | --out } { - | DESTINATION } ]" % indent)
2017 out (" %s [ -D | --no-decrypt ] [ -S | --split ]" % indent)
2018 out (" %s [ -f | --format ]" % indent)
2021 out ("\t\tSUBCOMMAND main mode: { process | scrypt }")
2023 out ("\t\t process: extract objects from PDT archive")
2024 out ("\t\t scrypt: calculate hash from password and first object")
2025 out ("\t\t-p PASSWORD password to derive the encryption key from")
2026 out ("\t\t-k KEY encryption key as 16 bytes in hexadecimal notation")
2027 out ("\t\t-s enforce strict handling of initialization vectors")
2028 out ("\t\t-i SOURCE file name to read from")
2029 out ("\t\t-o DESTINATION file to write output to")
2030 out ("\t\t-n SALT provide salt for scrypt mode in hex encoding")
2031 out ("\t\t-v print extra info")
2032 out ("\t\t-S split into files at object boundaries; this")
2033 out ("\t\t requires DESTINATION to refer to directory")
2034 out ("\t\t-D PDT header and ciphertext passthrough")
2035 out ("\t\t-f format of SCRYPT hash output (“default” or “parameters”)")
2037 out ("\tinstead of filenames, “-” may used to specify stdin / stdout")
2039 sys.exit ((err is True) and 42 or 0)
2049 def parse_argv (argv):
2050 global PDTCRYPT_OVERWRITE
2052 mode = PDTCRYPT_DECRYPT
2058 scrypt_format = PDTCRYPT_SCRYPT_DEFAULT
2061 SELF = os.path.basename (next (argvi))
2064 rawsubcmd = next (argvi)
2065 subcommand = PDTCRYPT_SUB [rawsubcmd]
2066 except StopIteration:
2067 bail ("ERROR: subcommand required")
2069 bail ("ERROR: invalid subcommand “%s” specified" % rawsubcmd)
2075 except StopIteration:
2076 bail ("ERROR: argument list incomplete")
2078 def checked_secret (s):
2083 bail ("ERROR: encountered “%s” but secret already given" % arg)
2086 if arg in [ "-h", "--help" ]:
2089 elif arg in [ "-v", "--verbose", "--wtf" ]:
2090 global PDTCRYPT_VERBOSE
2091 PDTCRYPT_VERBOSE = True
2092 elif arg in [ "-i", "--in", "--source" ]:
2093 insspec = checked_arg ()
2094 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt from %s" % insspec)
2095 elif arg in [ "-p", "--password" ]:
2096 arg = checked_arg ()
2097 checked_secret (make_secret (password=arg))
2098 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypting with password")
2100 if subcommand == PDTCRYPT_SUB_PROCESS:
2101 if arg in [ "-s", "--strict-ivs" ]:
2102 global PDTCRYPT_STRICTIVS
2103 PDTCRYPT_STRICTIVS = True
2104 elif arg in [ "-o", "--out", "--dest", "--sink" ]:
2105 outsspec = checked_arg ()
2106 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt to %s" % outsspec)
2107 elif arg in [ "-f", "--force" ]:
2108 PDTCRYPT_OVERWRITE = True
2109 if PDTCRYPT_VERBOSE is True: noise ("PDT: overwrite existing files")
2110 elif arg in [ "-S", "--split" ]:
2111 mode |= PDTCRYPT_SPLIT
2112 if PDTCRYPT_VERBOSE is True: noise ("PDT: split files")
2113 elif arg in [ "-D", "--no-decrypt" ]:
2114 mode &= ~PDTCRYPT_DECRYPT
2115 if PDTCRYPT_VERBOSE is True: noise ("PDT: not decrypting")
2116 elif arg in [ "-k", "--key" ]:
2117 arg = checked_arg ()
2118 checked_secret (make_secret (key=arg))
2119 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypting with key")
2121 bail ("ERROR: unexpected positional argument “%s”" % arg)
2122 elif subcommand == PDTCRYPT_SUB_SCRYPT:
2123 if arg in [ "-n", "--nacl", "--salt" ]:
2124 nacl = checked_arg ()
2125 if PDTCRYPT_VERBOSE is True: noise ("PDT: salt key with %s" % nacl)
2126 elif arg in [ "-f", "--format" ]:
2127 arg = checked_arg ()
2129 scrypt_format = PDTCRYPT_SCRYPT_FORMAT [arg]
2131 bail ("ERROR: invalid scrypt output format %s" % arg)
2132 if PDTCRYPT_VERBOSE is True:
2133 noise ("PDT: scrypt output format “%s”" % scrypt_format)
2135 bail ("ERROR: unexpected positional argument “%s”" % arg)
2136 elif subcommand == PDTCRYPT_SUB_SCAN:
2137 if arg in [ "-o", "--out", "--dest", "--sink" ]:
2138 outsspec = checked_arg ()
2139 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt to %s" % outsspec)
2140 elif arg in [ "-f", "--force" ]:
2141 PDTCRYPT_OVERWRITE = True
2142 if PDTCRYPT_VERBOSE is True: noise ("PDT: overwrite existing files")
2144 bail ("ERROR: unexpected positional argument “%s”" % arg)
2147 if PDTCRYPT_VERBOSE is True:
2148 noise ("ERROR: no password or key specified, trying $PDTCRYPT_PASSWORD")
2149 epw = os.getenv ("PDTCRYPT_PASSWORD")
2151 checked_secret (make_secret (password=epw.strip ()))
2154 if PDTCRYPT_VERBOSE is True:
2155 noise ("ERROR: no password or key specified, trying $PDTCRYPT_KEY")
2156 ek = os.getenv ("PDTCRYPT_KEY")
2158 checked_secret (make_secret (key=ek.strip ()))
2161 if subcommand == PDTCRYPT_SUB_SCRYPT:
2162 bail ("ERROR: scrypt hash mode requested but no password given")
2163 elif mode & PDTCRYPT_DECRYPT:
2164 bail ("ERROR: decryption requested but no password given")
2166 if mode & PDTCRYPT_SPLIT and outsspec is None:
2167 bail ("ERROR: split mode is incompatible with stdout sink "
2170 if subcommand == PDTCRYPT_SUB_SCAN and outsspec is None:
2171 pass # no output by default in scan mode
2172 elif mode & PDTCRYPT_SPLIT or subcommand == PDTCRYPT_SUB_SCAN:
2173 # destination must be directory
2175 bail ("ERROR: mode is incompatible with stdout sink")
2178 os.makedirs (outsspec, 0o700)
2179 except FileExistsError:
2180 # if it’s a directory with appropriate perms, everything is
2181 # good; otherwise, below invocation of open(2) will fail
2183 outs = os.open (outsspec, os.O_DIRECTORY, 0o600)
2184 except FileNotFoundError as exn:
2185 bail ("ERROR: cannot create target directory “%s”" % outsspec)
2186 except NotADirectoryError as exn:
2187 bail ("ERROR: target path “%s” is not a directory" % outsspec)
2189 outs = deptdcrypt_mk_stream (PDTCRYPT_SINK, outsspec or "-")
2191 if subcommand == PDTCRYPT_SUB_SCAN:
2193 bail ("ERROR: please supply an input file for scanning")
2195 bail ("ERROR: input must be seekable; please specify a file")
2196 return True, partial (mode_scan, secret, insspec, outs, nacl=nacl)
2198 if subcommand == PDTCRYPT_SUB_SCRYPT:
2199 if secret [0] == PDTCRYPT_SECRET_KEY:
2200 bail ("ERROR: scrypt mode requires a password")
2201 if insspec is not None and nacl is not None \
2202 or insspec is None and nacl is None :
2203 bail ("ERROR: please supply either an input file or "
2208 if insspec is not None or subcommand != PDTCRYPT_SUB_SCRYPT:
2209 ins = deptdcrypt_mk_stream (PDTCRYPT_SOURCE, insspec or "-")
2211 if subcommand == PDTCRYPT_SUB_SCRYPT:
2212 return True, partial (mode_scrypt, secret [1].encode (), ins, nacl,
2215 return True, partial (mode_depdtcrypt, mode, secret, ins, outs)
2219 ok, runner = parse_argv (argv)
2221 if ok is True: return runner ()
2226 if __name__ == "__main__":
2227 sys.exit (main (sys.argv))