6 ===============================================================================
7 crypto -- Encryption Layer for the Deltatar Backup
8 ===============================================================================
12 - AES-GCM for the symmetric encryption;
17 - NIST Recommendation for Block Cipher Modes of Operation: Galois/Counter
19 http://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf
22 https://cryptome.org/2014/01/aes-gcm-v1.pdf
24 - Authentication weaknesses in GCM
25 http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/comments/CWC-GCM/Ferguson2.pdf
28 -------------------------------------------------------------------------------
30 Errors fall into roughly three categories:
32 - Cryptographical errors or invalid data.
34 - ``InvalidGCMTag`` (decryption failed on account of an invalid GCM
36 - ``InvalidIVFixedPart`` (IV fixed part of object not found in list),
37 - ``DuplicateIV`` (the IV of an encrypted object already occurred),
38 - ``DecryptionError`` (used in CLI decryption for presenting error
39 conditions to the user).
41 - Incorrect usage of the library.
43 - ``InvalidParameter`` (non-conforming user supplied parameter),
44 - ``InvalidHeader`` (data passed for reading not parsable into header),
45 - ``FormatError`` (cannot handle header or parameter version),
48 - Bad internal state. If one of these is encountered it means that a state
49 was reached that shouldn’t occur during normal processing.
54 Also, ``EndOfFile`` is used as a sentinel to communicate that a stream supplied
55 for reading is exhausted.
57 Initialization Vectors
58 -------------------------------------------------------------------------------
60 Initialization vectors are checked for reuse during the lifetime of a decryptor.
61 The fixed counters for metadata files cannot be reused and attempts to do so
62 will cause a DuplicateIV error. This means the length of objects encrypted with
63 a metadata counter is capped at 63 GB.
65 For ordinary, non-metadata payload, there is an optional mode with strict IV
66 checking that causes a crypto context to fail if an IV encountered or created
67 was already used for decrypting or encrypting, respectively, an earlier object.
68 Note that this mode can trigger false positives when decrypting non-linearly,
69 e. g. when traversing the same object multiple times. Since the crypto context
70 has no notion of a position in a PDT encrypted archive, this condition must be
71 sorted out downstream.
74 -------------------------------------------------------------------------------
76 ``crypto.py`` may be invoked as a script for decrypting, validating, and
77 splitting PDT encrypted files. Consult the usage message for details.
81 Decrypt from stdin using the password ‘foo’: ::
83 $ crypto.py process foo -i - -o - <some-file.tar.gz.pdtcrypt >some-file.tar.gz
85 Output verbose information about the encrypted objects in the archive: ::
87 $ crypto.py process foo -v -i some-file.tar.gz.pdtcrypt -o /dev/null
88 PDT: decrypt from some-file.tar.gz.pdtcrypt
89 PDT: decrypt to /dev/null
90 PDT: source: file some-file.tar.gz.pdtcrypt
91 PDT: sink: file /dev/null
93 PDT: · version = 1 : 0100
94 PDT: · paramversion = 1 : 0100
95 PDT: · nacl : d270 b031 00d1 87e2 c946 610d 7b7f 7e5f
96 PDT: · iv : 02ee 3dd7 a963 1eb1 0100 0000
97 PDT: · ctsize = 591 : 4f02 0000 0000 0000
98 PDT: · tag : 5b2d 6d8b 8f82 4842 12fd 0b10 b6e3 369b
99 PDT: 64 decrypt obj no. 1, 591 B
100 PDT: · [64] 0% done, read block (591 B of 591 B remaining)
101 PDT: · decrypt ciphertext 591 B
102 PDT: · decrypt plaintext 591 B
106 Also, the mode *scrypt* allows deriving encryption keys. To calculate the
107 encryption key from the password ‘foo’ and the salt of the first object in a
108 PDT encrypted file: ::
110 $ crypto.py scrypt foo -i some-file.pdtcrypt
111 {"paramversion": 1, "salt": "Cqzbk48e3peEjzWto8D0yA==", "key": "JH9EkMwaM4x9F5aim5gK/Q=="}
113 The computed 16 byte key is given in hexadecimal notation in the value to
114 ``hash`` and can be fed into Python’s ``binascii.unhexlify()`` to obtain the
115 corresponding binary representation.
117 Note that in Scrypt hashing mode, no data integrity checks are being performed.
118 If the wrong password is given, a wrong key will be derived. Whether the password
119 was indeed correct can only be determined by decrypting. Note that since PDT
120 archives essentially consist of a stream of independent objects, the salt and
121 other parameters may change. Thus a key derived using above method from the
122 first object doesn’t necessarily apply to any of the subsequent objects.
131 from functools import reduce, partial
142 except ImportError as exn:
145 if __name__ == "__main__": ## Work around the import mechanism lest Python’s
146 pwd = os.getcwd() ## preference for local imports causes a cyclical
147 ## import (crypto → pylibscrypt → […] → ./tarfile → crypto).
148 sys.path = [ p for p in sys.path if p.find ("deltatar") < 0 ]
151 from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
152 from cryptography.hazmat.backends import default_backend
156 __all__ = [ "hdr_make", "hdr_read", "hdr_fmt", "hdr_fmt_pretty"
158 , "PDTCRYPT_HDR_SIZE", "AES_GCM_IV_CNT_DATA"
159 , "AES_GCM_IV_CNT_INFOFILE", "AES_GCM_IV_CNT_INDEX"
163 ###############################################################################
165 ###############################################################################
167 class EndOfFile (Exception):
171 def __init__ (self, n=None, msg=None):
177 class InvalidParameter (Exception):
178 """Inputs not valid for PDT encryption."""
182 class InvalidHeader (Exception):
183 """Header not valid."""
187 class InvalidGCMTag (Exception):
189 The GCM tag calculated during decryption differs from that in the object
195 class InvalidIVFixedPart (Exception):
197 IV fixed part not in supplied list: either the backup is corrupt or the
198 current object does not belong to it.
203 class IVFixedPartError (Exception):
205 Error creating a unique IV fixed part: repeated calls to system RNG yielded
206 the same sequence of bytes as the last IV used.
211 class InvalidFileCounter (Exception):
213 When encrypting, an attempted reuse of a dedicated counter (info file,
214 index file) was caught.
219 class DuplicateIV (Exception):
221 During encryption, the current IV fixed part is identical to an already
222 existing IV (same prefix and file counter). This indicates tampering or
223 programmer error and cannot be recovered from.
228 class NonConsecutiveIV (Exception):
230 IVs not numbered consecutively. This is a hard error with strict IV
231 checking. Precludes random access to the encrypted objects.
236 class FormatError (Exception):
237 """Unusable parameters in header."""
241 class DecryptionError (Exception):
242 """Error during decryption with ``crypto.py`` on the command line."""
246 class Unreachable (Exception):
248 Makeshift __builtin_unreachable(); always a programmer error if
254 class InternalError (Exception):
255 """Errors not ascribable to bad user inputs or cryptography."""
259 ###############################################################################
260 ## crypto layer version
261 ###############################################################################
263 ENCRYPTION_PARAMETERS = \
265 { "kdf": ("dummy", 16)
266 , "enc": "passthrough" }
274 , "enc": "aes-gcm" } }
276 ###############################################################################
278 ###############################################################################
280 PDTCRYPT_HDR_MAGIC = b"PDTCRYPT"
282 PDTCRYPT_HDR_SIZE_MAGIC = 8 # 8
283 PDTCRYPT_HDR_SIZE_VERSION = 2 # 10
284 PDTCRYPT_HDR_SIZE_PARAMVERSION = 2 # 12
285 PDTCRYPT_HDR_SIZE_NACL = 16 # 28
286 PDTCRYPT_HDR_SIZE_IV = 12 # 40
287 PDTCRYPT_HDR_SIZE_CTSIZE = 8 # 48
288 PDTCRYPT_HDR_SIZE_TAG = 16 # 64 GCM auth tag
290 PDTCRYPT_HDR_SIZE = PDTCRYPT_HDR_SIZE_MAGIC + PDTCRYPT_HDR_SIZE_VERSION \
291 + PDTCRYPT_HDR_SIZE_PARAMVERSION + PDTCRYPT_HDR_SIZE_NACL \
292 + PDTCRYPT_HDR_SIZE_IV + PDTCRYPT_HDR_SIZE_CTSIZE \
293 + PDTCRYPT_HDR_SIZE_TAG # = 64
295 # precalculate offsets since Python can’t do constant folding over names
296 HDR_OFF_VERSION = PDTCRYPT_HDR_SIZE_MAGIC
297 HDR_OFF_PARAMVERSION = HDR_OFF_VERSION + PDTCRYPT_HDR_SIZE_VERSION
298 HDR_OFF_NACL = HDR_OFF_PARAMVERSION + PDTCRYPT_HDR_SIZE_PARAMVERSION
299 HDR_OFF_IV = HDR_OFF_NACL + PDTCRYPT_HDR_SIZE_NACL
300 HDR_OFF_CTSIZE = HDR_OFF_IV + PDTCRYPT_HDR_SIZE_IV
301 HDR_OFF_TAG = HDR_OFF_CTSIZE + PDTCRYPT_HDR_SIZE_CTSIZE
305 FMT_I2N_IV = "<8sL" # 8 random bytes ‖ 32 bit counter
306 FMT_I2N_HDR = ("<" # host byte order
310 "16s" # sodium chloride
316 AES_KEY_SIZE = 16 # b"0123456789abcdef"
317 AES_KEY_SIZE_B64 = 24 # b'MDEyMzQ1Njc4OWFiY2RlZg=='
319 AES_GCM_MAX_SIZE = (1 << 36) - (1 << 5) # 2^39 - 2^8 b ≅ 64 GB.
320 # Source: NIST SP 800-38D section 5.2.1.1
321 # https://crypto.stackexchange.com/questions/31793/plain-text-size-limits-for-aes-gcm-mode-just-64gb
323 PDTCRYPT_MAX_OBJ_SIZE_DEFAULT = 63 * (1 << 30) # 63 GB
324 PDTCRYPT_MAX_OBJ_SIZE = PDTCRYPT_MAX_OBJ_SIZE_DEFAULT
326 # index and info files are written on-the fly while encrypting so their
327 # counters must be available in advance
328 AES_GCM_IV_CNT_INFOFILE = 1 # constant
329 AES_GCM_IV_CNT_INDEX = AES_GCM_IV_CNT_INFOFILE + 1
330 AES_GCM_IV_CNT_DATA = AES_GCM_IV_CNT_INDEX + 1 # also for multivolume
331 AES_GCM_IV_CNT_MAX_DEFAULT = 0xffFFffFF
332 AES_GCM_IV_CNT_MAX = AES_GCM_IV_CNT_MAX_DEFAULT
334 # IV structure and generation
335 PDTCRYPT_IV_GEN_MAX_RETRIES = 10 # ×
336 PDTCRYPT_IV_FIXEDPART_SIZE = 8 # B
337 PDTCRYPT_IV_COUNTER_SIZE = 4 # B
339 # secret type: PW of string | KEY of char [16]
340 PDTCRYPT_SECRET_PW = 0
341 PDTCRYPT_SECRET_KEY = 1
343 ###############################################################################
345 ###############################################################################
351 # , paramversion : u16
357 # fn hdr_read (f : handle) -> hdrinfo;
358 # fn hdr_make (f : handle, h : hdrinfo) -> IOResult<usize>;
359 # fn hdr_fmt (h : hdrinfo) -> String;
364 Read bytes as header structure.
366 If the input could not be interpreted as a header, fail with
371 mag, version, paramversion, nacl, iv, ctsize, tag = \
372 struct.unpack (FMT_I2N_HDR, data)
373 except Exception as exn:
374 raise InvalidHeader ("error unpacking header from [%r]: %s"
375 % (binascii.hexlify (data), str (exn)))
377 if mag != PDTCRYPT_HDR_MAGIC:
378 raise InvalidHeader ("bad magic in header: expected [%s], got [%s]"
379 % (PDTCRYPT_HDR_MAGIC, mag))
382 { "version" : version
383 , "paramversion" : paramversion
391 def hdr_read_stream (instr):
393 Read header from stream at the current position.
395 Fail with ``InvalidHeader`` if insufficient bytes were read from the
396 stream, or if the content could not be interpreted as a header.
398 data = instr.read(PDTCRYPT_HDR_SIZE)
402 elif ldata != PDTCRYPT_HDR_SIZE:
403 raise InvalidHeader ("hdr_read_stream: expected %d B, received %d B"
404 % (PDTCRYPT_HDR_SIZE, ldata))
405 return hdr_read (data)
408 def hdr_from_params (version, paramversion, nacl, iv, ctsize, tag):
410 Assemble the necessary values into a PDTCRYPT header.
412 :type version: int to fit uint16_t
413 :type paramversion: int to fit uint16_t
414 :type nacl: bytes to fit uint8_t[16]
415 :type iv: bytes to fit uint8_t[12]
416 :type size: int to fit uint64_t
417 :type tag: bytes to fit uint8_t[16]
419 buf = bytearray (PDTCRYPT_HDR_SIZE)
420 bufv = memoryview (buf)
423 struct.pack_into (FMT_I2N_HDR, bufv, 0,
425 version, paramversion, nacl, iv, ctsize, tag)
426 except Exception as exn:
427 return False, "error assembling header: %s" % str (exn)
429 return True, bytes (buf)
432 def hdr_make_dummy (s):
434 Create a header sized block of bytes initialized to a value derived from a
435 string. Used to verify we’ve jumped back correctly to the actual position
436 of the object header.
438 c = reduce (lambda a, c: a + ord(c), s, 0) % 0xFF
439 return bytes (bytearray (struct.pack ("B", c)) * PDTCRYPT_HDR_SIZE)
444 Assemble a header from the given header structure.
446 return hdr_from_params (version=hdr.get("version"),
447 paramversion=hdr.get("paramversion"),
448 nacl=hdr.get("nacl"), iv=hdr.get("iv"),
449 ctsize=hdr.get("ctsize"), tag=hdr.get("tag"))
452 HDR_FMT = "I2n_header { version: %d, paramversion: %d, nacl: %s[%d]," \
453 " iv: %s[%d], ctsize: %d, tag: %s[%d] }"
456 """Format a header structure into readable output."""
457 return HDR_FMT % (h["version"], h["paramversion"],
458 binascii.hexlify (h["nacl"]), len(h["nacl"]),
459 binascii.hexlify (h["iv"]), len(h["iv"]),
461 binascii.hexlify (h["tag"]), len(h["tag"]))
464 def hex_spaced_of_bytes (b):
465 """Format bytes object, hexdump style."""
466 return " ".join ([ "%.2x%.2x" % (c1, c2)
467 for c1, c2 in zip (b[0::2], b[1::2]) ]) \
468 + (len (b) | 1 == len (b) and " %.2x" % b[-1] or "") # odd lengths
471 def hdr_iv_counter (h):
472 """Extract the variable part of the IV of the given header."""
473 _fixed, cnt = struct.unpack (FMT_I2N_IV, h ["iv"])
477 def hdr_iv_fixed (h):
478 """Extract the fixed part of the IV of the given header."""
479 fixed, _cnt = struct.unpack (FMT_I2N_IV, h ["iv"])
483 hdr_dump = hex_spaced_of_bytes
487 """version = %-4d : %s
488 paramversion = %-4d : %s
495 def hdr_fmt_pretty (h):
497 Format header structure into multi-line representation of its contents and
498 their raw representation. (Omit the implicit “PDTCRYPT” magic bytes that
499 precede every header.)
501 return HDR_FMT_PRETTY \
503 hex_spaced_of_bytes (struct.pack (FMT_UINT16_LE, h["version"])),
505 hex_spaced_of_bytes (struct.pack (FMT_UINT16_LE, h["paramversion"])),
506 hex_spaced_of_bytes (h["nacl"]),
507 hex_spaced_of_bytes (h["iv"]),
509 hex_spaced_of_bytes (struct.pack (FMT_UINT64_LE, h["ctsize"])),
510 hex_spaced_of_bytes (h["tag"]))
512 IV_FMT = "((f %s) (c %d))"
515 """Format the two components of an IV in a readable fashion."""
516 fixed, cnt = struct.unpack (FMT_I2N_IV, iv)
517 return IV_FMT % (binascii.hexlify (fixed), cnt)
520 ###############################################################################
522 ###############################################################################
524 class Location (object):
528 def restore_loc_fmt (loc):
530 % (loc.n, loc.offset)
532 def locate_hdr_candidates (fd):
534 Walk over instances of the magic string in the payload, collecting their
535 positions. If the offset of the first found instance is not zero, the file
536 begins with leading garbage. Used by desaster recovery.
538 :return: The list of offsets in the file.
542 mm = mmap.mmap(fd, 0, mmap.MAP_SHARED, mmap.PROT_READ)
545 pos = mm.find (PDTCRYPT_HDR_MAGIC, pos)
554 HDR_CAND_GOOD = 0 # header marks begin of valid object
555 HDR_CAND_FISHY = 1 # inconclusive (tag mismatch, obj overlap etc.)
556 HDR_CAND_JUNK = 2 # not a header / object unreadable
559 { HDR_CAND_GOOD : "valid"
560 , HDR_CAND_FISHY : "fishy"
561 , HDR_CAND_JUNK : "junk"
565 def verdict_fmt (vdt):
566 return HDR_VERDICT_NAME [vdt]
569 def inspect_hdr (fd, off):
571 Attempt to parse a header in *fd* at position *off*.
573 Returns a verdict about the quality of that header plus the parsed header
577 _ = os.lseek (fd, off, os.SEEK_SET)
579 if os.lseek (fd, 0, os.SEEK_CUR) != off:
580 if PDTCRYPT_VERBOSE is True:
581 noise ("PDT: %d → dismissed (lseek() past EOF)" % off)
582 return HDR_CAND_JUNK, None
584 raw = os.read (fd, PDTCRYPT_HDR_SIZE)
585 if len (raw) != PDTCRYPT_HDR_SIZE:
586 if PDTCRYPT_VERBOSE is True:
587 noise ("PDT: %d → dismissed (EOF inside header)" % off)
588 return HDR_CAND_JUNK, None
592 except InvalidHeader as exn:
593 if PDTCRYPT_VERBOSE is True:
594 noise ("PDT: %d → dismissed (invalid: [%s])" % (off, str (exn)))
595 return HDR_CAND_JUNK, None
597 obj0 = off + PDTCRYPT_HDR_SIZE
598 objX = obj0 + hdr ["ctsize"]
600 eof = os.lseek (fd, 0, os.SEEK_END)
602 if PDTCRYPT_VERBOSE is True:
603 noise ("PDT: %d → EOF inside object (%d≤%d≤%d); adjusting size to "
604 "%d" % (off, obj0, eof, objX, (eof - obj0)))
605 # try reading up to the end
606 hdr ["ctsize"] = eof - obj0
607 return HDR_CAND_FISHY, hdr
609 return HDR_CAND_GOOD, hdr
612 def try_decrypt (ifd, off, hdr, secret, ofd=-1):
614 Attempt to decrypt the object in the (seekable) descriptor *ifd* starting
615 at *off* using the metadata in *hdr* and *secret*. An output fd can be
616 specified with *ofd*; if it is *-1* – the default –, the decrypted payload
619 Always creates a fresh decryptor, so validation steps across objects don’t
622 Errors during GCM tag validation are ignored. Used by desaster recovery.
624 ctleft = hdr ["ctsize"]
628 if ks == PDTCRYPT_SECRET_PW:
629 decr = Decrypt (password=secret [1])
630 elif ks == PDTCRYPT_SECRET_KEY:
632 decr = Decrypt (key=key)
639 os.lseek (ifd, pos, os.SEEK_SET)
642 cnksiz = min (ctleft, PDTCRYPT_BLOCKSIZE)
643 cnk = os.read (ifd, cnksiz)
646 pt = decr.process (cnk)
651 except InvalidGCMTag:
652 noise ("PDT: GCM tag mismatch for object %d–%d"
653 % (off, off + hdr ["ctsize"]))
654 if len (pt) > 0 and ofd != -1:
657 except Exception as exn:
658 noise ("PDT: error decrypting object %d–%d@%d, %d B remaining [%s]"
659 % (off, off + hdr ["ctsize"], pos, ctleft, exn))
665 def readable_objects_offsets (ifd, secret, cands):
667 From a list of candidates, locate the ones that mark the start of actual
668 readable PDTCRYPT objects.
672 for i, cand in enumerate (cands):
673 vdt, hdr = inspect_hdr (ifd, cand)
674 if vdt == HDR_CAND_JUNK:
675 pass # ignore unreadable ones
676 elif vdt in [HDR_CAND_GOOD, HDR_CAND_FISHY]:
677 ctsize = hdr ["ctsize"]
678 off0 = cand + PDTCRYPT_HDR_SIZE
679 ok = try_decrypt (ifd, off0, hdr, secret) == ctsize
681 good.append ((cand, off0 + ctsize))
683 overlap = find_overlaps (good)
685 return [ g [0] for g in good ]
688 def reconstruct_offsets (fname, secret):
689 ifd = os.open (fname, os.O_RDONLY)
692 cands = locate_hdr_candidates (ifd)
693 return readable_objects_offsets (ifd, secret, cands)
698 ###############################################################################
700 ###############################################################################
702 def make_secret (password=None, key=None):
704 Safely create a “secret” value that consists either of a key or a password.
705 Inputs are validated: the password is accepted as (UTF-8 encoded) bytes or
706 string; for the key only a bytes object of the proper size or a base64
707 encoded string thereof is accepted.
709 If both are provided, the key is preferred over the password; no checks are
710 performed whether the key is derived from the password.
712 :returns: secret value if inputs were acceptable | None otherwise.
715 if isinstance (key, str) is True:
716 key = key.encode ("utf-8")
717 if isinstance (key, bytes) is True:
718 if len (key) == AES_KEY_SIZE:
719 return (PDTCRYPT_SECRET_KEY, key)
720 if len (key) == AES_KEY_SIZE * 2:
722 key = binascii.unhexlify (key)
723 return (PDTCRYPT_SECRET_KEY, key)
724 except binascii.Error: # garbage in string
726 if len (key) == AES_KEY_SIZE_B64:
728 key = base64.b64decode (key)
729 # the base64 processor is very tolerant and allows for
730 # arbitrary trailing and leading data thus the data obtained
731 # must be checked for the proper length
732 if len (key) == AES_KEY_SIZE:
733 return (PDTCRYPT_SECRET_KEY, key)
734 except binascii.Error: # “incorrect padding”
736 elif password is not None:
737 if isinstance (password, str) is True:
738 return (PDTCRYPT_SECRET_PW, password)
739 elif isinstance (password, bytes) is True:
741 password = password.decode ("utf-8")
742 return (PDTCRYPT_SECRET_PW, password)
743 except UnicodeDecodeError:
749 ###############################################################################
750 ## passthrough / null encryption
751 ###############################################################################
753 class PassthroughCipher (object):
755 tag = struct.pack ("<QQ", 0, 0)
757 def __init__ (self) : pass
759 def update (self, b) : return b
761 def finalize (self) : return b""
763 def finalize_with_tag (self, _) : return b""
765 ###############################################################################
766 ## convenience wrapper
767 ###############################################################################
770 def kdf_dummy (klen, password, _nacl):
772 Fake KDF for testing purposes that is called when parameter version zero is
775 q, r = divmod (klen, len (password))
776 if isinstance (password, bytes) is False:
777 password = password.encode ()
778 return password * q + password [:r], b""
781 SCRYPT_KEY_MEMO = { } # static because needed for both the info file and the archive
784 def kdf_scrypt (params, password, nacl):
786 Wrapper for the Scrypt KDF, corresponds to parameter version one. The
787 computation result is memoized based on the inputs to facilitate spawning
788 multiple encryption contexts.
793 dkLen = params["dkLen"]
796 nacl = os.urandom (params["NaCl_LEN"])
798 key_parms = (password, nacl, N, r, p, dkLen)
799 global SCRYPT_KEY_MEMO
800 if key_parms not in SCRYPT_KEY_MEMO:
801 SCRYPT_KEY_MEMO [key_parms] = \
802 pylibscrypt.scrypt (password, nacl, N, r, p, dkLen)
803 return SCRYPT_KEY_MEMO [key_parms], nacl
806 def kdf_by_version (paramversion=None, defs=None):
808 Pick the KDF handler corresponding to the parameter version or the
811 :rtype: function (password : str, nacl : str) -> str
813 if paramversion is not None:
814 defs = ENCRYPTION_PARAMETERS.get(paramversion, None)
816 raise InvalidParameter ("no encryption parameters for version %r"
818 (kdf, params) = defs["kdf"]
820 if kdf == "scrypt" : fn = kdf_scrypt
821 if kdf == "dummy" : fn = kdf_dummy
823 raise ValueError ("key derivation method %r unknown" % kdf)
824 return partial (fn, params)
827 ###############################################################################
829 ###############################################################################
831 def scrypt_hashsource (pw, ins):
833 Calculate the SCRYPT hash from the password and the information contained
834 in the first header found in ``ins``.
836 This does not validate whether the first object is encrypted correctly.
838 if isinstance (pw, str) is True:
840 elif isinstance (pw, bytes) is False:
841 raise InvalidParameter ("password must be a string, not %s"
843 if isinstance (ins, io.BufferedReader) is False and \
844 isinstance (ins, io.FileIO) is False:
845 raise InvalidParameter ("file to hash must be opened in “binary” mode")
848 hdr = hdr_read_stream (ins)
849 except EndOfFile as exn:
850 noise ("PDT: malformed input: end of file reading first object header")
855 pver = hdr ["paramversion"]
856 if PDTCRYPT_VERBOSE is True:
857 noise ("PDT: salt of first object : %s" % binascii.hexlify (nacl))
858 noise ("PDT: parameter version of archive : %d" % pver)
861 defs = ENCRYPTION_PARAMETERS.get(pver, None)
862 kdfname, params = defs ["kdf"]
863 if kdfname != "scrypt":
864 noise ("PDT: input is not an SCRYPT archive")
867 kdf = kdf_by_version (None, defs)
868 except ValueError as exn:
869 noise ("PDT: object has unknown parameter version %d" % pver)
871 hsh, _void = kdf (pw, nacl)
873 return hsh, nacl, hdr ["version"], pver
876 def scrypt_hashfile (pw, fname):
878 Calculate the SCRYPT hash from the password and the information contained
879 in the first header found in the given file. The header is read only at
882 with deptdcrypt_mk_stream (PDTCRYPT_SOURCE, fname or "-") as ins:
883 hsh, _void, _void, _void = scrypt_hashsource (pw, ins)
887 ###############################################################################
889 ###############################################################################
891 class Crypto (object):
893 Encryption context to remain alive throughout an entire tarfile pass.
898 cnt = None # file counter (uint32_t != 0)
899 iv = None # current IV
900 fixed = None # accu for 64 bit fixed parts of IV
901 used_ivs = None # tracks IVs
902 strict_ivs = False # if True, panic on duplicate object IV
911 info_counter_used = False
912 index_counter_used = False
914 def __init__ (self, *al, **akv):
915 self.used_ivs = set ()
916 self.set_parameters (*al, **akv)
919 def next_fixed (self):
924 def set_object_counter (self, cnt=None):
926 Safely set the internal counter of encrypted objects. Numerous
929 The same counter may not be reused in combination with one IV fixed
930 part. This is validated elsewhere in the IV handling.
932 Counter zero is invalid. The first two counters are reserved for
933 metadata. The implementation does not allow for splitting metadata
934 files over multiple encrypted objects. (This would be possible by
935 assigning new fixed parts.) Thus in a Deltatar backup there is at most
936 one object with a counter value of one and two. On creation of a
937 context, the initial counter may be chosen. The globals
938 ``AES_GCM_IV_CNT_INFOFILE`` and ``AES_GCM_IV_CNT_INDEX`` can be used to
939 request one of the reserved values. If one of these values has been
940 used, any further attempt of setting the counter to that value will
941 be rejected with an ``InvalidFileCounter`` exception.
943 Out of bounds values (i. e. below one and more than the maximum of 2³²)
944 cause an ``InvalidParameter`` exception to be thrown.
947 self.cnt = AES_GCM_IV_CNT_DATA
949 if cnt == 0 or cnt > AES_GCM_IV_CNT_MAX + 1:
950 raise InvalidParameter ("invalid counter value %d requested: "
951 "acceptable values are from 1 to %d"
952 % (cnt, AES_GCM_IV_CNT_MAX))
953 if cnt == AES_GCM_IV_CNT_INFOFILE:
954 if self.info_counter_used is True:
955 raise InvalidFileCounter ("attempted to reuse info file "
956 "counter %d: must be unique" % cnt)
957 self.info_counter_used = True
958 elif cnt == AES_GCM_IV_CNT_INDEX:
959 if self.index_counter_used is True:
960 raise InvalidFileCounter ("attempted to reuse index file "
961 " counter %d: must be unique" % cnt)
962 self.index_counter_used = True
963 if cnt <= AES_GCM_IV_CNT_MAX:
966 # cnt == AES_GCM_IV_CNT_MAX + 1 → wrap
967 self.cnt = AES_GCM_IV_CNT_DATA
971 def set_parameters (self, password=None, key=None, paramversion=None,
972 nacl=None, counter=None, strict_ivs=False):
974 Configure the internal state of a crypto context. Not intended for
978 self.set_object_counter (counter)
979 self.strict_ivs = strict_ivs
981 if paramversion is not None:
982 self.paramversion = paramversion
985 self.key, self.nacl = key, nacl
988 if password is not None:
989 if isinstance (password, bytes) is False:
990 password = str.encode (password)
991 self.password = password
992 if paramversion is None and nacl is None:
993 # postpone key setup until first header is available
995 kdf = kdf_by_version (paramversion)
997 self.key, self.nacl = kdf (password, nacl)
1000 def process (self, buf):
1002 Encrypt / decrypt a buffer. Invokes the ``.update()`` method on the
1003 wrapped encryptor or decryptor, respectively.
1005 The Cryptography exception ``AlreadyFinalized`` is translated to an
1006 ``InternalError`` at this point. It may occur in sound code when the GC
1007 closes an encrypting stream after an error. Everywhere else it must be
1010 if self.enc is None:
1011 raise RuntimeError ("process: context not initialized")
1012 self.stats ["in"] += len (buf)
1014 out = self.enc.update (buf)
1015 except cryptography.exceptions.AlreadyFinalized as exn:
1016 raise InternalError (exn)
1017 self.stats ["out"] += len (out)
1021 def next (self, password, paramversion, nacl, iv):
1023 Prepare for encrypting another object: Reset the data counters and
1024 change the configuration in case one of the variable parameters differs
1025 from the last object. Also check the IV for duplicates and error out
1026 if strict checking was requested.
1030 self.stats ["obj"] += 1
1032 self.check_duplicate_iv (iv)
1034 if ( self.paramversion != paramversion
1035 or self.password != password
1036 or self.nacl != nacl):
1037 self.set_parameters (password=password, paramversion=paramversion,
1038 nacl=nacl, strict_ivs=self.strict_ivs)
1041 def check_duplicate_iv (self, iv):
1043 Add an IV (the 12 byte representation as in the header) to the list. With
1044 strict checking enabled, this will throw a ``DuplicateIV``. Depending on
1045 the context, this may indicate a serious error (IV reuse).
1047 if self.strict_ivs is True and iv in self.used_ivs:
1048 raise DuplicateIV ("iv %s was reused" % iv_fmt (iv))
1049 # vi has not been used before; add to collection
1050 self.used_ivs.add (iv)
1053 def counters (self):
1055 Access the data counters.
1057 return self.stats ["obj"], self.stats ["in"], self.stats ["out"]
1062 Clear the current context regardless of its finalization state. The
1063 next operation must be ``.next()``.
1068 class Encrypt (Crypto):
1074 def __init__ (self, version, paramversion, password=None, key=None, nacl=None,
1075 counter=AES_GCM_IV_CNT_DATA, strict_ivs=True):
1077 The ctor will throw immediately if one of the parameters does not conform
1078 to our expectations.
1080 counter=AES_GCM_IV_CNT_DATA, strict_ivs=True):
1081 :type version: int to fit uint16_t
1082 :type paramversion: int to fit uint16_t
1083 :param password: mutually exclusive with ``key``
1084 :type password: bytes
1085 :param key: mutually exclusive with ``password``
1088 :type counter: initial object counter the values
1089 ``AES_GCM_IV_CNT_INFOFILE`` and
1090 ``AES_GCM_IV_CNT_INDEX`` are unique in each backup set
1091 and cannot be reused even with different fixed parts.
1092 :type strict_ivs: bool
1094 if password is None and key is None \
1095 or password is not None and key is not None :
1096 raise InvalidParameter ("__init__: need either key or password")
1099 if isinstance (key, bytes) is False:
1100 raise InvalidParameter ("__init__: key must be provided as "
1101 "bytes, not %s" % type (key))
1103 raise InvalidParameter ("__init__: salt must be provided along "
1104 "with encryption key")
1105 else: # password, no key
1106 if isinstance (password, str) is False:
1107 raise InvalidParameter ("__init__: password must be a string, not %s"
1109 if len (password) == 0:
1110 raise InvalidParameter ("__init__: supplied empty password but not "
1111 "permitted for PDT encrypted files")
1113 if isinstance (version, int) is False:
1114 raise InvalidParameter ("__init__: version number must be an "
1115 "integer, not %s" % type (version))
1117 raise InvalidParameter ("__init__: version number must be a "
1118 "nonnegative integer, not %d" % version)
1120 if isinstance (paramversion, int) is False:
1121 raise InvalidParameter ("__init__: crypto parameter version number "
1122 "must be an integer, not %s"
1123 % type (paramversion))
1124 if paramversion < 0:
1125 raise InvalidParameter ("__init__: crypto parameter version number "
1126 "must be a nonnegative integer, not %d"
1129 if nacl is not None:
1130 if isinstance (nacl, bytes) is False:
1131 raise InvalidParameter ("__init__: salt given, but of type %s "
1132 "instead of bytes" % type (nacl))
1133 # salt length would depend on the actual encryption so it can’t be
1134 # validated at this point
1136 self.version = version
1137 self.paramenc = ENCRYPTION_PARAMETERS.get (paramversion) ["enc"]
1139 super().__init__ (password, key, paramversion, nacl, counter=counter,
1140 strict_ivs=strict_ivs)
1143 def next_fixed (self, retries=PDTCRYPT_IV_GEN_MAX_RETRIES):
1145 Generate the next IV fixed part by reading eight bytes from
1146 ``/dev/urandom``. The buffer so obtained is tested against the fixed
1147 parts used so far to prevent accidental reuse of IVs. After a
1148 configurable number of attempts to create a unique fixed part, it will
1149 refuse to continue with an ``IVFixedPartError``. This is unlikely to
1150 ever happen on a normal system but may detect an issue with the random
1153 The list of fixed parts that were used by the context at hand can be
1154 accessed through the ``.fixed`` list. Its last element is the fixed
1155 part currently in use.
1159 fp = os.urandom (PDTCRYPT_IV_FIXEDPART_SIZE)
1160 if fp not in self.fixed:
1161 self.fixed.append (fp)
1164 raise IVFixedPartError ("error obtaining a unique IV fixed part from "
1165 "/dev/urandom; giving up after %d tries" % i)
1170 Construct a 12-bytes IV from the current fixed part and the object
1173 return struct.pack(FMT_I2N_IV, self.fixed [-1], self.cnt)
1176 def next (self, filename=None, counter=None):
1178 Prepare for encrypting the next incoming object. Update the counter
1179 and put together the IV, possibly changing prefixes. Then create the
1182 The argument ``counter`` can be used to specify a file counter for this
1183 object. Unless it is one of the reserved values, the counter of
1184 subsequent objects will be computed from this one.
1186 If this is the first object in a series, ``filename`` is required,
1187 otherwise it is reused if not present. The value is used to derive a
1188 header sized placeholder to use until after encryption when all the
1189 inputs to construct the final header are available. This is then
1190 matched in ``.done()`` against the value found at the position of the
1191 header. The motivation for this extra check is primarily to assist
1192 format debugging: It makes stray headers easy to spot in malformed
1195 if filename is None:
1196 if self.lastinfo is None:
1197 raise InvalidParameter ("next: filename is mandatory for "
1199 filename, _dummy = self.lastinfo
1201 if isinstance (filename, str) is False:
1202 raise InvalidParameter ("next: filename must be a string, no %s"
1204 if counter is not None:
1205 if isinstance (counter, int) is False:
1206 raise InvalidParameter ("next: the supplied counter is of "
1207 "invalid type %s; please pass an "
1208 "integer instead" % type (counter))
1209 self.set_object_counter (counter)
1211 self.iv = self.iv_make ()
1212 if self.paramenc == "aes-gcm":
1214 ( algorithms.AES (self.key)
1215 , modes.GCM (self.iv)
1216 , backend = default_backend ()) \
1218 elif self.paramenc == "passthrough":
1219 self.enc = PassthroughCipher ()
1221 raise InvalidParameter ("next: parameter version %d not known"
1222 % self.paramversion)
1223 hdrdum = hdr_make_dummy (filename)
1224 self.lastinfo = (filename, hdrdum)
1225 super().next (self.password, self.paramversion, self.nacl, self.iv)
1227 self.set_object_counter (self.cnt + 1)
1231 def done (self, cmpdata):
1233 Complete encryption of an object. After this has been called, attempts
1234 of encrypting further data will cause an error until ``.next()`` is
1237 Returns a 64 bytes buffer containing the object header including all
1238 values including the “late” ones e. g. the ciphertext size and the
1241 if isinstance (cmpdata, bytes) is False:
1242 raise InvalidParameter ("done: comparison input expected as bytes, "
1243 "not %s" % type (cmpdata))
1244 if self.lastinfo is None:
1245 raise RuntimeError ("done: encryption context not initialized")
1246 filename, hdrdum = self.lastinfo
1247 if cmpdata != hdrdum:
1248 raise RuntimeError ("done: bad sync of header for object %d: "
1249 "preliminary data does not match; this likely "
1250 "indicates a wrongly repositioned stream"
1252 data = self.enc.finalize ()
1253 self.stats ["out"] += len (data)
1254 self.ctsize += len (data)
1255 ok, hdr = hdr_from_params (self.version, self.paramversion, self.nacl,
1256 self.iv, self.ctsize, self.enc.tag)
1258 raise InternalError ("error constructing header: %r" % hdr)
1259 return data, hdr, self.fixed
1262 def process (self, buf):
1264 Encrypt a chunk of plaintext with the active encryptor. Returns the
1265 size of the input consumed. This **must** be checked downstream. If the
1266 maximum possible object size has been reached, the current context must
1267 be finalized and a new one established before any further data can be
1268 encrypted. The second argument is the remainder of the plaintext that
1269 was not encrypted for the caller to use immediately after the new
1272 if isinstance (buf, bytes) is False:
1273 raise InvalidParameter ("process: expected byte buffer, not %s"
1276 newptsize = self.ptsize + bsize
1277 diff = newptsize - PDTCRYPT_MAX_OBJ_SIZE
1280 newptsize = PDTCRYPT_MAX_OBJ_SIZE
1281 self.ptsize = newptsize
1282 data = super().process (buf [:bsize])
1283 self.ctsize += len (data)
1287 class Decrypt (Crypto):
1289 tag = None # GCM tag, part of header
1290 last_iv = None # check consecutive ivs in strict mode
1292 def __init__ (self, password=None, key=None, counter=None, fixedparts=None,
1295 Sanitizing ctor for the decryption context. ``fixedparts`` specifies a
1296 list of IV fixed parts accepted during decryption. If a fixed part is
1297 encountered that is not in the list, decryption will fail.
1299 :param password: mutually exclusive with ``key``
1300 :type password: bytes
1301 :param key: mutually exclusive with ``password``
1303 :type counter: initial object counter the values
1304 ``AES_GCM_IV_CNT_INFOFILE`` and
1305 ``AES_GCM_IV_CNT_INDEX`` are unique in each backup set
1306 and cannot be reused even with different fixed parts.
1307 :type fixedparts: bytes list
1309 if password is None and key is None \
1310 or password is not None and key is not None :
1311 raise InvalidParameter ("__init__: need either key or password")
1314 if isinstance (key, bytes) is False:
1315 raise InvalidParameter ("__init__: key must be provided as "
1316 "bytes, not %s" % type (key))
1317 else: # password, no key
1318 if isinstance (password, str) is False:
1319 raise InvalidParameter ("__init__: password must be a string, not %s"
1321 if len (password) == 0:
1322 raise InvalidParameter ("__init__: supplied empty password but not "
1323 "permitted for PDT encrypted files")
1325 if fixedparts is not None:
1326 if isinstance (fixedparts, list) is False:
1327 raise InvalidParameter ("__init__: IV fixed parts must be "
1328 "supplied as list, not %s"
1329 % type (fixedparts))
1330 self.fixed = fixedparts
1333 super().__init__ (password=password, key=key, counter=counter,
1334 strict_ivs=strict_ivs)
1337 def valid_fixed_part (self, iv):
1339 Check if a fixed part was already seen.
1341 # check if fixed part is known
1342 fixed, _cnt = struct.unpack (FMT_I2N_IV, iv)
1343 i = bisect.bisect_left (self.fixed, fixed)
1344 return i != len (self.fixed) and self.fixed [i] == fixed
1347 def check_consecutive_iv (self, iv):
1349 Check whether the counter part of the given IV is indeed the successor
1350 of the currently present counter. This should always be the case for
1351 the objects in a well formed PDT archive but should not be enforced
1352 when decrypting out-of-order.
1354 fixed, cnt = struct.unpack (FMT_I2N_IV, iv)
1355 if self.strict_ivs is True \
1356 and self.last_iv is not None \
1357 and self.last_iv [0] == fixed \
1358 and self.last_iv [1] != cnt - 1:
1359 raise NonConsecutiveIV ("iv %s counter not successor of "
1360 "last object (expected %d, found %d)"
1361 % (fixed, iv_fmt (self.last_iv [1]), cnt))
1362 self.last_iv = (fixed, cnt)
1365 def next (self, hdr):
1367 Start decrypting the next object. The PDTCRYPT header for the object
1368 can be given either as already parsed object or as bytes.
1370 if isinstance (hdr, bytes) is True:
1371 hdr = hdr_read (hdr)
1372 elif isinstance (hdr, dict) is False:
1373 # this won’t catch malformed specs though
1374 raise InvalidParameter ("next: wrong type of parameter hdr: "
1375 "expected bytes or spec, got %s"
1378 paramversion = hdr ["paramversion"]
1383 raise InvalidHeader ("next: not a header %r" % hdr)
1385 super().next (self.password, paramversion, nacl, iv)
1386 if self.fixed is not None and self.valid_fixed_part (iv) is False:
1387 raise InvalidIVFixedPart ("iv %s has invalid fixed part"
1389 self.check_consecutive_iv (iv)
1392 defs = ENCRYPTION_PARAMETERS.get (paramversion, None)
1394 raise FormatError ("header contains unknown parameter version %d; "
1395 "maybe the file was created by a more recent "
1396 "version of Deltatar" % paramversion)
1398 if enc == "aes-gcm":
1400 ( algorithms.AES (self.key)
1401 , modes.GCM (iv, tag=self.tag)
1402 , backend = default_backend ()) \
1404 elif enc == "passthrough":
1405 self.enc = PassthroughCipher ()
1407 raise InternalError ("encryption parameter set %d refers to unknown "
1408 "mode %r" % (paramversion, enc))
1409 self.set_object_counter (self.cnt + 1)
1412 def done (self, tag=None):
1414 Stop decryption of the current object and finalize it with the active
1415 context. This will throw an *InvalidGCMTag* exception to indicate that
1416 the authentication tag does not match the data. If the tag is correct,
1417 the rest of the plaintext is returned.
1422 data = self.enc.finalize ()
1424 if isinstance (tag, bytes) is False:
1425 raise InvalidParameter ("done: wrong type of parameter "
1426 "tag: expected bytes, got %s"
1428 data = self.enc.finalize_with_tag (self.tag)
1429 except cryptography.exceptions.InvalidTag:
1430 raise InvalidGCMTag ("done: tag mismatch of object %d: %s "
1431 "rejected by finalize ()"
1432 % (self.cnt, binascii.hexlify (self.tag)))
1433 self.ctsize += len (data)
1434 self.stats ["out"] += len (data)
1438 def process (self, buf):
1440 Decrypt the bytes object *buf* with the active decryptor.
1442 if isinstance (buf, bytes) is False:
1443 raise InvalidParameter ("process: expected byte buffer, not %s"
1445 self.ctsize += len (buf)
1446 data = super().process (buf)
1447 self.ptsize += len (data)
1451 ###############################################################################
1453 ###############################################################################
1455 def _patch_global (glob, vow, n=None):
1457 Adapt upper file counter bound for testing IV logic. Completely unsafe.
1459 assert vow == "I am fully aware that this will void my warranty."
1460 r = globals () [glob]
1462 n = globals () [glob + "_DEFAULT"]
1463 globals () [glob] = n
1466 _testing_set_AES_GCM_IV_CNT_MAX = \
1467 partial (_patch_global, "AES_GCM_IV_CNT_MAX")
1469 _testing_set_PDTCRYPT_MAX_OBJ_SIZE = \
1470 partial (_patch_global, "PDTCRYPT_MAX_OBJ_SIZE")
1472 def open2_dump_file (fname, dir_fd, force=False):
1475 oflags = os.O_CREAT | os.O_WRONLY
1477 oflags |= os.O_TRUNC
1482 outfd = os.open (fname, oflags,
1483 stat.S_IRUSR | stat.S_IWUSR, dir_fd=dir_fd)
1484 except FileExistsError as exn:
1485 noise ("PDT: refusing to overwrite existing file %s" % fname)
1487 raise RuntimeError ("destination file %s already exists" % fname)
1488 if PDTCRYPT_VERBOSE is True:
1489 noise ("PDT: new output file %s (fd=%d)" % (fname, outfd))
1493 ###############################################################################
1494 ## freestanding invocation
1495 ###############################################################################
1497 PDTCRYPT_SUB_PROCESS = 0
1498 PDTCRYPT_SUB_SCRYPT = 1
1499 PDTCRYPT_SUB_SCAN = 2
1502 { "process" : PDTCRYPT_SUB_PROCESS
1503 , "scrypt" : PDTCRYPT_SUB_SCRYPT
1504 , "scan" : PDTCRYPT_SUB_SCAN }
1506 PDTCRYPT_DECRYPT = 1 << 0 # decrypt archive with password
1507 PDTCRYPT_SPLIT = 1 << 1 # split archive into individual objects
1508 PDTCRYPT_HASH = 1 << 2 # output scrypt hash for file and given password
1510 PDTCRYPT_SPLITNAME = "pdtcrypt-object-%d.bin"
1511 PDTCRYPT_RESCUENAME = "pdtcrypt-rescue-object-%0.5d.bin"
1513 PDTCRYPT_VERBOSE = False
1514 PDTCRYPT_STRICTIVS = False
1515 PDTCRYPT_OVERWRITE = False
1516 PDTCRYPT_BLOCKSIZE = 1 << 12
1521 PDTCRYPT_DEFAULT_VER = 1
1522 PDTCRYPT_DEFAULT_PVER = 1
1524 # scrypt hashing output control
1525 PDTCRYPT_SCRYPT_INTRANATOR = 0
1526 PDTCRYPT_SCRYPT_PARAMETERS = 1
1527 PDTCRYPT_SCRYPT_DEFAULT = PDTCRYPT_SCRYPT_INTRANATOR
1529 PDTCRYPT_SCRYPT_FORMAT = \
1530 { "i2n" : PDTCRYPT_SCRYPT_INTRANATOR
1531 , "params" : PDTCRYPT_SCRYPT_PARAMETERS }
1533 PDTCRYPT_TT_COLUMNS = 80 # assume standard terminal
1535 class PDTDecryptionError (Exception):
1536 """Decryption failed."""
1538 class PDTSplitError (Exception):
1539 """Decryption failed."""
1542 def noise (*a, **b):
1543 print (file=sys.stderr, *a, **b)
1546 class PassthroughDecryptor (object):
1548 curhdr = None # write current header on first data write
1550 def __init__ (self):
1551 if PDTCRYPT_VERBOSE is True:
1552 noise ("PDT: no encryption; data passthrough")
1554 def next (self, hdr):
1555 ok, curhdr = hdr_make (hdr)
1557 raise PDTDecryptionError ("bad header %r" % hdr)
1558 self.curhdr = curhdr
1561 if self.curhdr is not None:
1565 def process (self, d):
1566 if self.curhdr is not None:
1572 def depdtcrypt (mode, secret, ins, outs):
1574 Remove PDTCRYPT layer from all objects encrypted with the secret. Used on a
1575 Deltatar backup this will yield a (possibly Gzip compressed) tarball.
1577 ctleft = -1 # length of ciphertext to consume
1578 ctcurrent = 0 # total ciphertext of current object
1579 total_obj = 0 # total number of objects read
1580 total_pt = 0 # total plaintext bytes
1581 total_ct = 0 # total ciphertext bytes
1582 total_read = 0 # total bytes read
1583 outfile = None # Python file object for output
1585 if mode & PDTCRYPT_DECRYPT: # decryptor
1587 if ks == PDTCRYPT_SECRET_PW:
1588 decr = Decrypt (password=secret [1], strict_ivs=PDTCRYPT_STRICTIVS)
1589 elif ks == PDTCRYPT_SECRET_KEY:
1591 decr = Decrypt (key=key, strict_ivs=PDTCRYPT_STRICTIVS)
1593 raise InternalError ("‘%d’ does not specify a valid kind of secret"
1596 decr = PassthroughDecryptor ()
1599 """Dummy for non-split mode: output file does not vary."""
1602 if mode & PDTCRYPT_SPLIT:
1603 def nextout (outfile):
1605 We were passed an fd as outs for accessing the destination
1606 directory where extracted archive components are supposed
1611 if PDTCRYPT_VERBOSE is True:
1612 noise ("PDT: no output file to close at this point")
1614 if PDTCRYPT_VERBOSE is True:
1615 noise ("PDT: release output file %r" % outfile)
1616 # cleanup happens automatically by the GC; the next
1617 # line will error out on account of an invalid fd
1620 assert total_obj > 0
1621 fname = PDTCRYPT_SPLITNAME % total_obj
1623 outfd = open2_dump_file (fname, outs, force=PDTCRYPT_OVERWRITE)
1624 except RuntimeError as exn:
1625 raise PDTSplitError (exn)
1626 return os.fdopen (outfd, "wb", closefd=True)
1630 """ESPIPE is normal on non-seekable stdio stream."""
1633 except OSError as exn:
1634 if exn.errno == errno.ESPIPE:
1637 def out (pt, outfile):
1641 if PDTCRYPT_VERBOSE is True:
1642 noise ("PDT:\t· decrypt plaintext %d B" % (npt))
1644 nn = outfile.write (pt)
1645 except OSError as exn: # probably ENOSPC
1646 raise DecryptionError ("error (%s)" % exn)
1648 raise DecryptionError ("write aborted after %d of %d B" % (nn, npt))
1652 # current object completed; in a valid archive this marks either
1653 # the start of a new header or the end of the input
1654 if ctleft == 0: # current object requires finalization
1655 if PDTCRYPT_VERBOSE is True:
1656 noise ("PDT: %d finalize" % tell (ins))
1659 except InvalidGCMTag as exn:
1660 raise DecryptionError ("error finalizing object %d (%d B): "
1661 "%r" % (total_obj, len (pt), exn)) \
1664 if PDTCRYPT_VERBOSE is True:
1665 noise ("PDT:\t· object validated")
1667 if PDTCRYPT_VERBOSE is True:
1668 noise ("PDT: %d hdr" % tell (ins))
1670 hdr = hdr_read_stream (ins)
1671 total_read += PDTCRYPT_HDR_SIZE
1672 except EndOfFile as exn:
1673 total_read += exn.remainder
1674 if total_ct + total_obj * PDTCRYPT_HDR_SIZE != total_read:
1675 raise PDTDecryptionError ("ciphertext processed (%d B) plus "
1676 "overhead (%d × %d B) does not match "
1677 "the number of bytes read (%d )"
1678 % (total_ct, total_obj, PDTCRYPT_HDR_SIZE,
1680 # the single good exit
1681 return total_read, total_obj, total_ct, total_pt
1682 except InvalidHeader as exn:
1683 raise PDTDecryptionError ("invalid header at position %d in %r "
1684 "(%s)" % (tell (ins), exn, ins))
1685 if PDTCRYPT_VERBOSE is True:
1686 pretty = hdr_fmt_pretty (hdr)
1687 noise (reduce (lambda a, e: (a + "\n" if a else "") + "PDT:\t· " + e,
1688 pretty.splitlines (), ""))
1689 ctcurrent = ctleft = hdr ["ctsize"]
1693 total_obj += 1 # used in file counter with split mode
1695 # finalization complete or skipped in case of first object in
1696 # stream; create a new output file if necessary
1697 outfile = nextout (outfile)
1699 if PDTCRYPT_VERBOSE is True:
1700 noise ("PDT: %d decrypt obj no. %d, %d B"
1701 % (tell (ins), total_obj, ctleft))
1703 # always allocate a new buffer since python-cryptography doesn’t allow
1704 # passing a bytearray :/
1705 nexpect = min (ctleft, PDTCRYPT_BLOCKSIZE)
1706 if PDTCRYPT_VERBOSE is True:
1707 noise ("PDT:\t· [%d] %d%% done, read block (%d B of %d B remaining)"
1709 100 - ctleft * 100 / (ctcurrent > 0 and ctcurrent or 1),
1711 ct = ins.read (nexpect)
1715 raise EndOfFile (nct,
1716 "hit EOF after %d of %d B in block [%d:%d); "
1717 "%d B ciphertext remaining for object no %d"
1718 % (nct, nexpect, off, off + nexpect, ctleft,
1724 if PDTCRYPT_VERBOSE is True:
1725 noise ("PDT:\t· decrypt ciphertext %d B" % (nct))
1726 pt = decr.process (ct)
1730 def deptdcrypt_mk_stream (kind, path):
1731 """Create stream from file or stdio descriptor."""
1732 if kind == PDTCRYPT_SINK:
1734 if PDTCRYPT_VERBOSE is True: noise ("PDT: sink: stdout")
1735 return sys.stdout.buffer
1737 if PDTCRYPT_VERBOSE is True: noise ("PDT: sink: file %s" % path)
1738 return io.FileIO (path, "w")
1739 if kind == PDTCRYPT_SOURCE:
1741 if PDTCRYPT_VERBOSE is True: noise ("PDT: source: stdin")
1742 return sys.stdin.buffer
1744 if PDTCRYPT_VERBOSE is True: noise ("PDT: source: file %s" % path)
1745 return io.FileIO (path, "r")
1747 raise ValueError ("bogus stream “%s” / %s" % (kind, path))
1750 def mode_depdtcrypt (mode, secret, ins, outs):
1752 total_read, total_obj, total_ct, total_pt = \
1753 depdtcrypt (mode, secret, ins, outs)
1754 except DecryptionError as exn:
1755 noise ("PDT: Decryption failed:")
1757 noise ("PDT: “%s”" % exn)
1759 noise ("PDT: Did you specify the correct key / password?")
1762 except PDTSplitError as exn:
1763 noise ("PDT: Split operation failed:")
1765 noise ("PDT: “%s”" % exn)
1767 noise ("PDT: Hint: target directory should be empty.")
1771 if PDTCRYPT_VERBOSE is True:
1772 noise ("PDT: decryption successful" )
1773 noise ("PDT: %.10d bytes read" % total_read)
1774 noise ("PDT: %.10d objects decrypted" % total_obj )
1775 noise ("PDT: %.10d bytes ciphertext" % total_ct )
1776 noise ("PDT: %.10d bytes plaintext" % total_pt )
1782 def mode_scrypt (pw, ins=None, nacl=None, fmt=PDTCRYPT_SCRYPT_INTRANATOR):
1784 paramversion = PDTCRYPT_DEFAULT_PVER
1786 hsh, nacl, version, paramversion = scrypt_hashsource (pw, ins)
1787 defs = ENCRYPTION_PARAMETERS.get(paramversion, None)
1789 nacl = binascii.unhexlify (nacl)
1790 defs = ENCRYPTION_PARAMETERS.get(paramversion, None)
1791 version = PDTCRYPT_DEFAULT_VER
1793 kdfname, params = defs ["kdf"]
1795 kdf = kdf_by_version (None, defs)
1796 hsh, _void = kdf (pw, nacl)
1800 if fmt == PDTCRYPT_SCRYPT_INTRANATOR:
1801 out = json.dumps ({ "salt" : base64.b64encode (nacl).decode ()
1802 , "key" : base64.b64encode (hsh) .decode ()
1803 , "paramversion" : paramversion })
1804 elif fmt == PDTCRYPT_SCRYPT_PARAMETERS:
1805 out = json.dumps ({ "salt" : binascii.hexlify (nacl).decode ()
1806 , "key" : binascii.hexlify (hsh) .decode ()
1807 , "version" : version
1808 , "scrypt_params" : { "N" : params ["N"]
1809 , "r" : params ["r"]
1810 , "p" : params ["p"]
1811 , "dkLen" : params ["dkLen"] } })
1813 raise RuntimeError ("bad scrypt output scheme %r" % fmt)
1818 def noise_output_candidates (cands, indent=8, cols=PDTCRYPT_TT_COLUMNS):
1820 Print a list of offsets without garbling the terminal too much.
1822 The indent is counted from column zero; if it is wide enough, the “PDT: ”
1823 marker will be prepended, considered part of the indentation.
1827 idt = " " * indent if indent < 5 else "PDT: " + " " * (indent - 5)
1832 init = True # prevent leading separator
1835 raise ValueError ("the requested indentation exceeds the line "
1836 "width by %d" % (indent - wd))
1846 if lpos > wd: # line break
1862 SLICE_START = 1 # ordering is important to have starts of intervals
1863 SLICE_END = 0 # sorted before equal ends
1865 def find_overlaps (slices):
1867 Find overlapping slices: iterate open/close points of intervals, tracking
1868 the ones open at any time.
1871 inside = set () # of indices into bounds
1872 ovrlp = set () # of indices into bounds
1874 for i, s in enumerate (slices):
1875 bounds.append ((s [0], SLICE_START, i))
1876 bounds.append ((s [1], SLICE_END , i))
1877 bounds = sorted (bounds)
1881 if val [1] == SLICE_START:
1884 if len (inside) > 1: # closing one that overlapped
1888 return [ slices [i] for i in ovrlp ]
1891 def mode_scan (secret, fname, outs=None, nacl=None):
1893 Dissect a binary file, looking for PDTCRYPT headers and objects.
1895 If *outs* is supplied, recoverable data will be dumped into the specified
1899 ifd = os.open (fname, os.O_RDONLY)
1900 except FileNotFoundError:
1901 noise ("PDT: failed to open %s readonly" % fname)
1906 if PDTCRYPT_VERBOSE is True:
1907 noise ("PDT: scan for potential sync points")
1908 cands = locate_hdr_candidates (ifd)
1909 if len (cands) == 0:
1910 noise ("PDT: scan complete: input does not contain potential PDT "
1911 "headers; giving up.")
1913 if PDTCRYPT_VERBOSE is True:
1914 noise ("PDT: scan complete: found %d candidates:" % len (cands))
1915 noise_output_candidates (cands)
1920 junk, todo, slices = [], [], []
1925 vdt, hdr = inspect_hdr (ifd, cand)
1927 vdts = verdict_fmt (vdt)
1929 if vdt == HDR_CAND_JUNK:
1930 noise ("PDT: obj %d: %s object: bad header, skipping" % vdts)
1933 off0 = cand + PDTCRYPT_HDR_SIZE
1934 if PDTCRYPT_VERBOSE is True:
1935 noise ("PDT: obj %d: read payload @%d" % (nobj, off0))
1936 pretty = hdr_fmt_pretty (hdr)
1937 noise (reduce (lambda a, e: (a + "\n" if a else "") + "PDT:\t· " + e,
1938 pretty.splitlines (), ""))
1941 if outs is not None:
1942 ofname = PDTCRYPT_RESCUENAME % nobj
1943 ofd = open2_dump_file (ofname, outs, force=PDTCRYPT_OVERWRITE)
1945 ctsize = hdr ["ctsize"]
1947 l = try_decrypt (ifd, off0, hdr, secret, ofd=ofd)
1949 slices.append ((off0, off0 + l))
1953 if vdt == HDR_CAND_GOOD and ok is True:
1954 noise ("PDT: %d → ✓ %s object %d–%d"
1955 % (cand, vdts, off0, off0 + ctsize))
1956 elif vdt == HDR_CAND_FISHY and ok is True:
1957 noise ("PDT: %d → × %s object %d–%d, corrupt header"
1958 % (cand, vdts, off0, off0 + ctsize))
1959 elif vdt == HDR_CAND_GOOD and ok is False:
1960 noise ("PDT: %d → × %s object %d–%d, problematic payload"
1961 % (cand, vdts, off0, off0 + ctsize))
1962 elif vdt == HDR_CAND_FISHY and ok is False:
1963 noise ("PDT: %d → × %s object %d–%d, corrupt header, problematic "
1964 "ciphertext" % (cand, vdts, off0, off0 + ctsize))
1971 noise ("PDT: all headers ok")
1973 noise ("PDT: %d candidates not parseable as headers:" % len (junk))
1974 noise_output_candidates (junk)
1976 overlap = find_overlaps (slices)
1977 if len (overlap) > 0:
1978 noise ("PDT: %d objects overlapping others" % len (overlap))
1979 for slice in overlap:
1980 noise ("PDT: × %d→%d" % (slice [0], slice [1]))
1982 def usage (err=False):
1986 indent = ' ' * len (SELF)
1987 out ("usage: %s SUBCOMMAND { --help" % SELF)
1988 out (" %s | [ -v ] { -p PASSWORD | -k KEY }" % indent)
1989 out (" %s [ { -i | --in } { - | SOURCE } ]" % indent)
1990 out (" %s [ { -n | --nacl } { SALT } ]" % indent)
1991 out (" %s [ { -o | --out } { - | DESTINATION } ]" % indent)
1992 out (" %s [ -D | --no-decrypt ] [ -S | --split ]" % indent)
1993 out (" %s [ -f | --format ]" % indent)
1996 out ("\t\tSUBCOMMAND main mode: { process | scrypt }")
1998 out ("\t\t process: extract objects from PDT archive")
1999 out ("\t\t scrypt: calculate hash from password and first object")
2000 out ("\t\t-p PASSWORD password to derive the encryption key from")
2001 out ("\t\t-k KEY encryption key as 16 bytes in hexadecimal notation")
2002 out ("\t\t-s enforce strict handling of initialization vectors")
2003 out ("\t\t-i SOURCE file name to read from")
2004 out ("\t\t-o DESTINATION file to write output to")
2005 out ("\t\t-n SALT provide salt for scrypt mode in hex encoding")
2006 out ("\t\t-v print extra info")
2007 out ("\t\t-S split into files at object boundaries; this")
2008 out ("\t\t requires DESTINATION to refer to directory")
2009 out ("\t\t-D PDT header and ciphertext passthrough")
2010 out ("\t\t-f format of SCRYPT hash output (“default” or “parameters”)")
2012 out ("\tinstead of filenames, “-” may used to specify stdin / stdout")
2014 sys.exit ((err is True) and 42 or 0)
2024 def parse_argv (argv):
2025 global PDTCRYPT_OVERWRITE
2027 mode = PDTCRYPT_DECRYPT
2033 scrypt_format = PDTCRYPT_SCRYPT_DEFAULT
2036 SELF = os.path.basename (next (argvi))
2039 rawsubcmd = next (argvi)
2040 subcommand = PDTCRYPT_SUB [rawsubcmd]
2041 except StopIteration:
2042 bail ("ERROR: subcommand required")
2044 bail ("ERROR: invalid subcommand “%s” specified" % rawsubcmd)
2050 except StopIteration:
2051 bail ("ERROR: argument list incomplete")
2053 def checked_secret (s):
2058 bail ("ERROR: encountered “%s” but secret already given" % arg)
2061 if arg in [ "-h", "--help" ]:
2064 elif arg in [ "-v", "--verbose", "--wtf" ]:
2065 global PDTCRYPT_VERBOSE
2066 PDTCRYPT_VERBOSE = True
2067 elif arg in [ "-i", "--in", "--source" ]:
2068 insspec = checked_arg ()
2069 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt from %s" % insspec)
2070 elif arg in [ "-p", "--password" ]:
2071 arg = checked_arg ()
2072 checked_secret (make_secret (password=arg))
2073 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypting with password")
2075 if subcommand == PDTCRYPT_SUB_PROCESS:
2076 if arg in [ "-s", "--strict-ivs" ]:
2077 global PDTCRYPT_STRICTIVS
2078 PDTCRYPT_STRICTIVS = True
2079 elif arg in [ "-o", "--out", "--dest", "--sink" ]:
2080 outsspec = checked_arg ()
2081 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt to %s" % outsspec)
2082 elif arg in [ "-f", "--force" ]:
2083 PDTCRYPT_OVERWRITE = True
2084 if PDTCRYPT_VERBOSE is True: noise ("PDT: overwrite existing files")
2085 elif arg in [ "-S", "--split" ]:
2086 mode |= PDTCRYPT_SPLIT
2087 if PDTCRYPT_VERBOSE is True: noise ("PDT: split files")
2088 elif arg in [ "-D", "--no-decrypt" ]:
2089 mode &= ~PDTCRYPT_DECRYPT
2090 if PDTCRYPT_VERBOSE is True: noise ("PDT: not decrypting")
2091 elif arg in [ "-k", "--key" ]:
2092 arg = checked_arg ()
2093 checked_secret (make_secret (key=arg))
2094 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypting with key")
2096 bail ("ERROR: unexpected positional argument “%s”" % arg)
2097 elif subcommand == PDTCRYPT_SUB_SCRYPT:
2098 if arg in [ "-n", "--nacl", "--salt" ]:
2099 nacl = checked_arg ()
2100 if PDTCRYPT_VERBOSE is True: noise ("PDT: salt key with %s" % nacl)
2101 elif arg in [ "-f", "--format" ]:
2102 arg = checked_arg ()
2104 scrypt_format = PDTCRYPT_SCRYPT_FORMAT [arg]
2106 bail ("ERROR: invalid scrypt output format %s" % arg)
2107 if PDTCRYPT_VERBOSE is True:
2108 noise ("PDT: scrypt output format “%s”" % scrypt_format)
2110 bail ("ERROR: unexpected positional argument “%s”" % arg)
2111 elif subcommand == PDTCRYPT_SUB_SCAN:
2112 if arg in [ "-o", "--out", "--dest", "--sink" ]:
2113 outsspec = checked_arg ()
2114 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt to %s" % outsspec)
2115 elif arg in [ "-f", "--force" ]:
2116 PDTCRYPT_OVERWRITE = True
2117 if PDTCRYPT_VERBOSE is True: noise ("PDT: overwrite existing files")
2119 bail ("ERROR: unexpected positional argument “%s”" % arg)
2122 if PDTCRYPT_VERBOSE is True:
2123 noise ("ERROR: no password or key specified, trying $PDTCRYPT_PASSWORD")
2124 epw = os.getenv ("PDTCRYPT_PASSWORD")
2126 checked_secret (make_secret (password=epw.strip ()))
2129 if PDTCRYPT_VERBOSE is True:
2130 noise ("ERROR: no password or key specified, trying $PDTCRYPT_KEY")
2131 ek = os.getenv ("PDTCRYPT_KEY")
2133 checked_secret (make_secret (key=ek.strip ()))
2136 if subcommand == PDTCRYPT_SUB_SCRYPT:
2137 bail ("ERROR: scrypt hash mode requested but no password given")
2138 elif mode & PDTCRYPT_DECRYPT:
2139 bail ("ERROR: decryption requested but no password given")
2141 if mode & PDTCRYPT_SPLIT and outsspec is None:
2142 bail ("ERROR: split mode is incompatible with stdout sink "
2145 if subcommand == PDTCRYPT_SUB_SCAN and outsspec is None:
2146 pass # no output by default in scan mode
2147 elif mode & PDTCRYPT_SPLIT or subcommand == PDTCRYPT_SUB_SCAN:
2148 # destination must be directory
2150 bail ("ERROR: mode is incompatible with stdout sink")
2153 os.makedirs (outsspec, 0o700)
2154 except FileExistsError:
2155 # if it’s a directory with appropriate perms, everything is
2156 # good; otherwise, below invocation of open(2) will fail
2158 outs = os.open (outsspec, os.O_DIRECTORY, 0o600)
2159 except FileNotFoundError as exn:
2160 bail ("ERROR: cannot create target directory “%s”" % outsspec)
2161 except NotADirectoryError as exn:
2162 bail ("ERROR: target path “%s” is not a directory" % outsspec)
2164 outs = deptdcrypt_mk_stream (PDTCRYPT_SINK, outsspec or "-")
2166 if subcommand == PDTCRYPT_SUB_SCAN:
2168 bail ("ERROR: please supply an input file for scanning")
2170 bail ("ERROR: input must be seekable; please specify a file")
2171 return True, partial (mode_scan, secret, insspec, outs, nacl=nacl)
2173 if subcommand == PDTCRYPT_SUB_SCRYPT:
2174 if secret [0] == PDTCRYPT_SECRET_KEY:
2175 bail ("ERROR: scrypt mode requires a password")
2176 if insspec is not None and nacl is not None \
2177 or insspec is None and nacl is None :
2178 bail ("ERROR: please supply either an input file or "
2183 if insspec is not None or subcommand != PDTCRYPT_SUB_SCRYPT:
2184 ins = deptdcrypt_mk_stream (PDTCRYPT_SOURCE, insspec or "-")
2186 if subcommand == PDTCRYPT_SUB_SCRYPT:
2187 return True, partial (mode_scrypt, secret [1].encode (), ins, nacl,
2190 return True, partial (mode_depdtcrypt, mode, secret, ins, outs)
2194 ok, runner = parse_argv (argv)
2196 if ok is True: return runner ()
2201 if __name__ == "__main__":
2202 sys.exit (main (sys.argv))