6 ===============================================================================
7 crypto -- Encryption Layer for the Deltatar Backup
8 ===============================================================================
12 - AES-GCM for the symmetric encryption;
17 - NIST Recommendation for Block Cipher Modes of Operation: Galois/Counter
19 http://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf
22 https://cryptome.org/2014/01/aes-gcm-v1.pdf
24 - Authentication weaknesses in GCM
25 http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/comments/CWC-GCM/Ferguson2.pdf
28 -------------------------------------------------------------------------------
30 Errors fall into roughly three categories:
32 - Cryptographical errors or invalid data.
34 - ``InvalidGCMTag`` (decryption failed on account of an invalid GCM
36 - ``InvalidIVFixedPart`` (IV fixed part of object not found in list),
37 - ``DuplicateIV`` (the IV of an encrypted object already occurred),
38 - ``DecryptionError`` (used in CLI decryption for presenting error
39 conditions to the user).
41 - Incorrect usage of the library.
43 - ``InvalidParameter`` (non-conforming user supplied parameter),
44 - ``InvalidHeader`` (data passed for reading not parsable into header),
45 - ``FormatError`` (cannot handle header or parameter version),
48 - Bad internal state. If one of these is encountered it means that a state
49 was reached that shouldn’t occur during normal processing.
54 Also, ``EndOfFile`` is used as a sentinel to communicate that a stream supplied
55 for reading is exhausted.
57 Initialization Vectors
58 -------------------------------------------------------------------------------
60 Initialization vectors are checked for reuse during the lifetime of a decryptor.
61 The fixed counters for metadata files cannot be reused and attempts to do so
62 will cause a DuplicateIV error. This means the length of objects encrypted with
63 a metadata counter is capped at 63 GB.
65 For ordinary, non-metadata payload, there is an optional mode with strict IV
66 checking that causes a crypto context to fail if an IV encountered or created
67 was already used for decrypting or encrypting, respectively, an earlier object.
68 Note that this mode can trigger false positives when decrypting non-linearly,
69 e. g. when traversing the same object multiple times. Since the crypto context
70 has no notion of a position in a PDT encrypted archive, this condition must be
71 sorted out downstream.
74 -------------------------------------------------------------------------------
76 ``crypto.py`` may be invoked as a script for decrypting, validating, and
77 splitting PDT encrypted files. Consult the usage message for details.
81 Decrypt from stdin using the password ‘foo’: ::
83 $ crypto.py process foo -i - -o - <some-file.tar.gz.pdtcrypt >some-file.tar.gz
85 Output verbose information about the encrypted objects in the archive: ::
87 $ crypto.py process foo -v -i some-file.tar.gz.pdtcrypt -o /dev/null
88 PDT: decrypt from some-file.tar.gz.pdtcrypt
89 PDT: decrypt to /dev/null
90 PDT: source: file some-file.tar.gz.pdtcrypt
91 PDT: sink: file /dev/null
93 PDT: · version = 1 : 0100
94 PDT: · paramversion = 1 : 0100
95 PDT: · nacl : d270 b031 00d1 87e2 c946 610d 7b7f 7e5f
96 PDT: · iv : 02ee 3dd7 a963 1eb1 0100 0000
97 PDT: · ctsize = 591 : 4f02 0000 0000 0000
98 PDT: · tag : 5b2d 6d8b 8f82 4842 12fd 0b10 b6e3 369b
99 PDT: 64 decrypt obj no. 1, 591 B
100 PDT: · [64] 0% done, read block (591 B of 591 B remaining)
101 PDT: · decrypt ciphertext 591 B
102 PDT: · decrypt plaintext 591 B
106 Also, the mode *scrypt* allows deriving encryption keys. To calculate the
107 encryption key from the password ‘foo’ and the salt of the first object in a
108 PDT encrypted file: ::
110 $ crypto.py scrypt foo -i some-file.pdtcrypt
111 {"paramversion": 1, "salt": "Cqzbk48e3peEjzWto8D0yA==", "key": "JH9EkMwaM4x9F5aim5gK/Q=="}
113 The computed 16 byte key is given in hexadecimal notation in the value to
114 ``hash`` and can be fed into Python’s ``binascii.unhexlify()`` to obtain the
115 corresponding binary representation.
117 Note that in Scrypt hashing mode, no data integrity checks are being performed.
118 If the wrong password is given, a wrong key will be derived. Whether the password
119 was indeed correct can only be determined by decrypting. Note that since PDT
120 archives essentially consist of a stream of independent objects, the salt and
121 other parameters may change. Thus a key derived using above method from the
122 first object doesn’t necessarily apply to any of the subsequent objects.
131 from functools import reduce, partial
142 except ImportError as exn:
145 if __name__ == "__main__": ## Work around the import mechanism lest Python’s
146 pwd = os.getcwd() ## preference for local imports causes a cyclical
147 ## import (crypto → pylibscrypt → […] → ./tarfile → crypto).
148 sys.path = [ p for p in sys.path if p.find ("deltatar") < 0 ]
151 from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
152 from cryptography.hazmat.backends import default_backend
156 __all__ = [ "hdr_make", "hdr_read", "hdr_fmt", "hdr_fmt_pretty"
158 , "PDTCRYPT_HDR_SIZE", "AES_GCM_IV_CNT_DATA"
159 , "AES_GCM_IV_CNT_INFOFILE", "AES_GCM_IV_CNT_INDEX"
163 ###############################################################################
165 ###############################################################################
167 class EndOfFile (Exception):
171 def __init__ (self, n=None, msg=None):
177 class InvalidParameter (Exception):
178 """Inputs not valid for PDT encryption."""
182 class InvalidHeader (Exception):
183 """Header not valid."""
187 class InvalidGCMTag (Exception):
189 The GCM tag calculated during decryption differs from that in the object
195 class InvalidIVFixedPart (Exception):
197 IV fixed part not in supplied list: either the backup is corrupt or the
198 current object does not belong to it.
203 class IVFixedPartError (Exception):
205 Error creating a unique IV fixed part: repeated calls to system RNG yielded
206 the same sequence of bytes as the last IV used.
211 class InvalidFileCounter (Exception):
213 When encrypting, an attempted reuse of a dedicated counter (info file,
214 index file) was caught.
219 class DuplicateIV (Exception):
221 During encryption, the current IV fixed part is identical to an already
222 existing IV (same prefix and file counter). This indicates tampering or
223 programmer error and cannot be recovered from.
228 class NonConsecutiveIV (Exception):
230 IVs not numbered consecutively. This is a hard error with strict IV
231 checking. Precludes random access to the encrypted objects.
236 class FormatError (Exception):
237 """Unusable parameters in header."""
241 class DecryptionError (Exception):
242 """Error during decryption with ``crypto.py`` on the command line."""
246 class Unreachable (Exception):
248 Makeshift __builtin_unreachable(); always a programmer error if
254 class InternalError (Exception):
255 """Errors not ascribable to bad user inputs or cryptography."""
259 ###############################################################################
260 ## crypto layer version
261 ###############################################################################
263 ENCRYPTION_PARAMETERS = \
265 { "kdf": ("dummy", 16)
266 , "enc": "passthrough" }
274 , "enc": "aes-gcm" } }
276 ###############################################################################
278 ###############################################################################
280 PDTCRYPT_HDR_MAGIC = b"PDTCRYPT"
282 PDTCRYPT_HDR_SIZE_MAGIC = 8 # 8
283 PDTCRYPT_HDR_SIZE_VERSION = 2 # 10
284 PDTCRYPT_HDR_SIZE_PARAMVERSION = 2 # 12
285 PDTCRYPT_HDR_SIZE_NACL = 16 # 28
286 PDTCRYPT_HDR_SIZE_IV = 12 # 40
287 PDTCRYPT_HDR_SIZE_CTSIZE = 8 # 48
288 PDTCRYPT_HDR_SIZE_TAG = 16 # 64 GCM auth tag
290 PDTCRYPT_HDR_SIZE = PDTCRYPT_HDR_SIZE_MAGIC + PDTCRYPT_HDR_SIZE_VERSION \
291 + PDTCRYPT_HDR_SIZE_PARAMVERSION + PDTCRYPT_HDR_SIZE_NACL \
292 + PDTCRYPT_HDR_SIZE_IV + PDTCRYPT_HDR_SIZE_CTSIZE \
293 + PDTCRYPT_HDR_SIZE_TAG # = 64
295 # precalculate offsets since Python can’t do constant folding over names
296 HDR_OFF_VERSION = PDTCRYPT_HDR_SIZE_MAGIC
297 HDR_OFF_PARAMVERSION = HDR_OFF_VERSION + PDTCRYPT_HDR_SIZE_VERSION
298 HDR_OFF_NACL = HDR_OFF_PARAMVERSION + PDTCRYPT_HDR_SIZE_PARAMVERSION
299 HDR_OFF_IV = HDR_OFF_NACL + PDTCRYPT_HDR_SIZE_NACL
300 HDR_OFF_CTSIZE = HDR_OFF_IV + PDTCRYPT_HDR_SIZE_IV
301 HDR_OFF_TAG = HDR_OFF_CTSIZE + PDTCRYPT_HDR_SIZE_CTSIZE
305 FMT_I2N_IV = "<8sL" # 8 random bytes ‖ 32 bit counter
306 FMT_I2N_HDR = ("<" # host byte order
310 "16s" # sodium chloride
316 AES_KEY_SIZE = 16 # b"0123456789abcdef"
317 AES_KEY_SIZE_B64 = 24 # b'MDEyMzQ1Njc4OWFiY2RlZg=='
318 AES_GCM_MAX_SIZE = (1 << 36) - (1 << 5) # 2^39 - 2^8 b ≅ 64 GB
319 PDTCRYPT_MAX_OBJ_SIZE_DEFAULT = 63 * (1 << 30) # 63 GB
320 PDTCRYPT_MAX_OBJ_SIZE = PDTCRYPT_MAX_OBJ_SIZE_DEFAULT
322 # index and info files are written on-the fly while encrypting so their
323 # counters must be available in advance
324 AES_GCM_IV_CNT_INFOFILE = 1 # constant
325 AES_GCM_IV_CNT_INDEX = AES_GCM_IV_CNT_INFOFILE + 1
326 AES_GCM_IV_CNT_DATA = AES_GCM_IV_CNT_INDEX + 1 # also for multivolume
327 AES_GCM_IV_CNT_MAX_DEFAULT = 0xffFFffFF
328 AES_GCM_IV_CNT_MAX = AES_GCM_IV_CNT_MAX_DEFAULT
330 # IV structure and generation
331 PDTCRYPT_IV_GEN_MAX_RETRIES = 10 # ×
332 PDTCRYPT_IV_FIXEDPART_SIZE = 8 # B
333 PDTCRYPT_IV_COUNTER_SIZE = 4 # B
335 # secret type: PW of string | KEY of char [16]
336 PDTCRYPT_SECRET_PW = 0
337 PDTCRYPT_SECRET_KEY = 1
339 ###############################################################################
341 ###############################################################################
347 # , paramversion : u16
353 # fn hdr_read (f : handle) -> hdrinfo;
354 # fn hdr_make (f : handle, h : hdrinfo) -> IOResult<usize>;
355 # fn hdr_fmt (h : hdrinfo) -> String;
360 Read bytes as header structure.
362 If the input could not be interpreted as a header, fail with
367 mag, version, paramversion, nacl, iv, ctsize, tag = \
368 struct.unpack (FMT_I2N_HDR, data)
369 except Exception as exn:
370 raise InvalidHeader ("error unpacking header from [%r]: %s"
371 % (binascii.hexlify (data), str (exn)))
373 if mag != PDTCRYPT_HDR_MAGIC:
374 raise InvalidHeader ("bad magic in header: expected [%s], got [%s]"
375 % (PDTCRYPT_HDR_MAGIC, mag))
378 { "version" : version
379 , "paramversion" : paramversion
387 def hdr_read_stream (instr):
389 Read header from stream at the current position.
391 Fail with ``InvalidHeader`` if insufficient bytes were read from the
392 stream, or if the content could not be interpreted as a header.
394 data = instr.read(PDTCRYPT_HDR_SIZE)
398 elif ldata != PDTCRYPT_HDR_SIZE:
399 raise InvalidHeader ("hdr_read_stream: expected %d B, received %d B"
400 % (PDTCRYPT_HDR_SIZE, ldata))
401 return hdr_read (data)
404 def hdr_from_params (version, paramversion, nacl, iv, ctsize, tag):
406 Assemble the necessary values into a PDTCRYPT header.
408 :type version: int to fit uint16_t
409 :type paramversion: int to fit uint16_t
410 :type nacl: bytes to fit uint8_t[16]
411 :type iv: bytes to fit uint8_t[12]
412 :type size: int to fit uint64_t
413 :type tag: bytes to fit uint8_t[16]
415 buf = bytearray (PDTCRYPT_HDR_SIZE)
416 bufv = memoryview (buf)
419 struct.pack_into (FMT_I2N_HDR, bufv, 0,
421 version, paramversion, nacl, iv, ctsize, tag)
422 except Exception as exn:
423 return False, "error assembling header: %s" % str (exn)
425 return True, bytes (buf)
428 def hdr_make_dummy (s):
430 Create a header sized block of bytes initialized to a value derived from a
431 string. Used to verify we’ve jumped back correctly to the actual position
432 of the object header.
434 c = reduce (lambda a, c: a + ord(c), s, 0) % 0xFF
435 return bytes (bytearray (struct.pack ("B", c)) * PDTCRYPT_HDR_SIZE)
440 Assemble a header from the given header structure.
442 return hdr_from_params (version=hdr.get("version"),
443 paramversion=hdr.get("paramversion"),
444 nacl=hdr.get("nacl"), iv=hdr.get("iv"),
445 ctsize=hdr.get("ctsize"), tag=hdr.get("tag"))
448 HDR_FMT = "I2n_header { version: %d, paramversion: %d, nacl: %s[%d]," \
449 " iv: %s[%d], ctsize: %d, tag: %s[%d] }"
452 """Format a header structure into readable output."""
453 return HDR_FMT % (h["version"], h["paramversion"],
454 binascii.hexlify (h["nacl"]), len(h["nacl"]),
455 binascii.hexlify (h["iv"]), len(h["iv"]),
457 binascii.hexlify (h["tag"]), len(h["tag"]))
460 def hex_spaced_of_bytes (b):
461 """Format bytes object, hexdump style."""
462 return " ".join ([ "%.2x%.2x" % (c1, c2)
463 for c1, c2 in zip (b[0::2], b[1::2]) ]) \
464 + (len (b) | 1 == len (b) and " %.2x" % b[-1] or "") # odd lengths
467 def hdr_iv_counter (h):
468 """Extract the variable part of the IV of the given header."""
469 _fixed, cnt = struct.unpack (FMT_I2N_IV, h ["iv"])
473 def hdr_iv_fixed (h):
474 """Extract the fixed part of the IV of the given header."""
475 fixed, _cnt = struct.unpack (FMT_I2N_IV, h ["iv"])
479 hdr_dump = hex_spaced_of_bytes
483 """version = %-4d : %s
484 paramversion = %-4d : %s
491 def hdr_fmt_pretty (h):
493 Format header structure into multi-line representation of its contents and
494 their raw representation. (Omit the implicit “PDTCRYPT” magic bytes that
495 precede every header.)
497 return HDR_FMT_PRETTY \
499 hex_spaced_of_bytes (struct.pack (FMT_UINT16_LE, h["version"])),
501 hex_spaced_of_bytes (struct.pack (FMT_UINT16_LE, h["paramversion"])),
502 hex_spaced_of_bytes (h["nacl"]),
503 hex_spaced_of_bytes (h["iv"]),
505 hex_spaced_of_bytes (struct.pack (FMT_UINT64_LE, h["ctsize"])),
506 hex_spaced_of_bytes (h["tag"]))
508 IV_FMT = "((f %s) (c %d))"
511 """Format the two components of an IV in a readable fashion."""
512 fixed, cnt = struct.unpack (FMT_I2N_IV, iv)
513 return IV_FMT % (binascii.hexlify (fixed), cnt)
516 ###############################################################################
518 ###############################################################################
520 class Location (object):
524 def restore_loc_fmt (loc):
526 % (loc.n, loc.offset)
528 def locate_hdr_candidates (fd):
530 Walk over instances of the magic string in the payload, collecting their
531 positions. If the offset of the first found instance is not zero, the file
532 begins with leading garbage. Used by desaster recovery.
534 :return: The list of offsets in the file.
538 mm = mmap.mmap(fd, 0, mmap.MAP_SHARED, mmap.PROT_READ)
541 pos = mm.find (PDTCRYPT_HDR_MAGIC, pos)
550 HDR_CAND_GOOD = 0 # header marks begin of valid object
551 HDR_CAND_FISHY = 1 # inconclusive (tag mismatch, obj overlap etc.)
552 HDR_CAND_JUNK = 2 # not a header / object unreadable
555 { HDR_CAND_GOOD : "valid"
556 , HDR_CAND_FISHY : "fishy"
557 , HDR_CAND_JUNK : "junk"
561 def verdict_fmt (vdt):
562 return HDR_VERDICT_NAME [vdt]
565 def inspect_hdr (fd, off):
567 Attempt to parse a header in *fd* at position *off*.
569 Returns a verdict about the quality of that header plus the parsed header
573 _ = os.lseek (fd, off, os.SEEK_SET)
575 if os.lseek (fd, 0, os.SEEK_CUR) != off:
576 if PDTCRYPT_VERBOSE is True:
577 noise ("PDT: %d → dismissed (lseek() past EOF)" % off)
578 return HDR_CAND_JUNK, None
580 raw = os.read (fd, PDTCRYPT_HDR_SIZE)
581 if len (raw) != PDTCRYPT_HDR_SIZE:
582 if PDTCRYPT_VERBOSE is True:
583 noise ("PDT: %d → dismissed (EOF inside header)" % off)
584 return HDR_CAND_JUNK, None
588 except InvalidHeader as exn:
589 if PDTCRYPT_VERBOSE is True:
590 noise ("PDT: %d → dismissed (invalid: [%s])" % (off, str (exn)))
591 return HDR_CAND_JUNK, None
593 obj0 = off + PDTCRYPT_HDR_SIZE
594 objX = obj0 + hdr ["ctsize"]
596 eof = os.lseek (fd, 0, os.SEEK_END)
598 if PDTCRYPT_VERBOSE is True:
599 noise ("PDT: %d → EOF inside object (%d≤%d≤%d); adjusting size to "
600 "%d" % (off, obj0, eof, objX, (eof - obj0)))
601 # try reading up to the end
602 hdr ["ctsize"] = eof - obj0
603 return HDR_CAND_FISHY, hdr
605 return HDR_CAND_GOOD, hdr
608 def try_decrypt (ifd, off, hdr, secret, ofd=-1):
610 Attempt to decrypt the object in the (seekable) descriptor *ifd* starting
611 at *off* using the metadata in *hdr* and *secret*. An output fd can be
612 specified with *ofd*; if it is *-1* – the default –, the decrypted payload
615 Always creates a fresh decryptor, so validation steps across objects don’t
618 Errors during GCM tag validation are ignored. Used by desaster recovery.
620 ctleft = hdr ["ctsize"]
624 if ks == PDTCRYPT_SECRET_PW:
625 decr = Decrypt (password=secret [1])
626 elif ks == PDTCRYPT_SECRET_KEY:
628 decr = Decrypt (key=key)
635 os.lseek (ifd, pos, os.SEEK_SET)
638 cnksiz = min (ctleft, PDTCRYPT_BLOCKSIZE)
639 cnk = os.read (ifd, cnksiz)
642 pt = decr.process (cnk)
647 except InvalidGCMTag:
648 noise ("PDT: GCM tag mismatch for object %d–%d"
649 % (off, off + hdr ["ctsize"]))
650 if len (pt) > 0 and ofd != -1:
653 except Exception as exn:
654 noise ("PDT: error decrypting object %d–%d@%d, %d B remaining [%s]"
655 % (off, off + hdr ["ctsize"], pos, ctleft, exn))
661 def readable_objects_offsets (ifd, secret, cands):
663 From a list of candidates, locate the ones that mark the start of actual
664 readable PDTCRYPT objects.
668 for i, cand in enumerate (cands):
669 vdt, hdr = inspect_hdr (ifd, cand)
670 if vdt == HDR_CAND_JUNK:
671 pass # ignore unreadable ones
672 elif vdt in [HDR_CAND_GOOD, HDR_CAND_FISHY]:
673 ctsize = hdr ["ctsize"]
674 off0 = cand + PDTCRYPT_HDR_SIZE
675 ok = try_decrypt (ifd, off0, hdr, secret) == ctsize
677 good.append ((cand, off0 + ctsize))
679 overlap = find_overlaps (good)
681 return [ g [0] for g in good ]
684 def reconstruct_offsets (fname, secret):
685 ifd = os.open (fname, os.O_RDONLY)
688 cands = locate_hdr_candidates (ifd)
689 return readable_objects_offsets (ifd, secret, cands)
694 ###############################################################################
696 ###############################################################################
698 def make_secret (password=None, key=None):
700 Safely create a “secret” value that consists either of a key or a password.
701 Inputs are validated: the password is accepted as (UTF-8 encoded) bytes or
702 string; for the key only a bytes object of the proper size or a base64
703 encoded string thereof is accepted.
705 If both are provided, the key is preferred over the password; no checks are
706 performed whether the key is derived from the password.
708 :returns: secret value if inputs were acceptable | None otherwise.
711 if isinstance (key, str) is True:
712 key = key.encode ("utf-8")
713 if isinstance (key, bytes) is True:
714 if len (key) == AES_KEY_SIZE:
715 return (PDTCRYPT_SECRET_KEY, key)
716 if len (key) == AES_KEY_SIZE * 2:
718 key = binascii.unhexlify (key)
719 return (PDTCRYPT_SECRET_KEY, key)
720 except binascii.Error: # garbage in string
722 if len (key) == AES_KEY_SIZE_B64:
724 key = base64.b64decode (key)
725 # the base64 processor is very tolerant and allows for
726 # arbitrary trailing and leading data thus the data obtained
727 # must be checked for the proper length
728 if len (key) == AES_KEY_SIZE:
729 return (PDTCRYPT_SECRET_KEY, key)
730 except binascii.Error: # “incorrect padding”
732 elif password is not None:
733 if isinstance (password, str) is True:
734 return (PDTCRYPT_SECRET_PW, password)
735 elif isinstance (password, bytes) is True:
737 password = password.decode ("utf-8")
738 return (PDTCRYPT_SECRET_PW, password)
739 except UnicodeDecodeError:
745 ###############################################################################
746 ## passthrough / null encryption
747 ###############################################################################
749 class PassthroughCipher (object):
751 tag = struct.pack ("<QQ", 0, 0)
753 def __init__ (self) : pass
755 def update (self, b) : return b
757 def finalize (self) : return b""
759 def finalize_with_tag (self, _) : return b""
761 ###############################################################################
762 ## convenience wrapper
763 ###############################################################################
766 def kdf_dummy (klen, password, _nacl):
768 Fake KDF for testing purposes that is called when parameter version zero is
771 q, r = divmod (klen, len (password))
772 if isinstance (password, bytes) is False:
773 password = password.encode ()
774 return password * q + password [:r], b""
777 SCRYPT_KEY_MEMO = { } # static because needed for both the info file and the archive
780 def kdf_scrypt (params, password, nacl):
782 Wrapper for the Scrypt KDF, corresponds to parameter version one. The
783 computation result is memoized based on the inputs to facilitate spawning
784 multiple encryption contexts.
789 dkLen = params["dkLen"]
792 nacl = os.urandom (params["NaCl_LEN"])
794 key_parms = (password, nacl, N, r, p, dkLen)
795 global SCRYPT_KEY_MEMO
796 if key_parms not in SCRYPT_KEY_MEMO:
797 SCRYPT_KEY_MEMO [key_parms] = \
798 pylibscrypt.scrypt (password, nacl, N, r, p, dkLen)
799 return SCRYPT_KEY_MEMO [key_parms], nacl
802 def kdf_by_version (paramversion=None, defs=None):
804 Pick the KDF handler corresponding to the parameter version or the
807 :rtype: function (password : str, nacl : str) -> str
809 if paramversion is not None:
810 defs = ENCRYPTION_PARAMETERS.get(paramversion, None)
812 raise InvalidParameter ("no encryption parameters for version %r"
814 (kdf, params) = defs["kdf"]
816 if kdf == "scrypt" : fn = kdf_scrypt
817 if kdf == "dummy" : fn = kdf_dummy
819 raise ValueError ("key derivation method %r unknown" % kdf)
820 return partial (fn, params)
823 ###############################################################################
825 ###############################################################################
827 def scrypt_hashsource (pw, ins):
829 Calculate the SCRYPT hash from the password and the information contained
830 in the first header found in ``ins``.
832 This does not validate whether the first object is encrypted correctly.
834 if isinstance (pw, str) is True:
836 elif isinstance (pw, bytes) is False:
837 raise InvalidParameter ("password must be a string, not %s"
839 if isinstance (ins, io.BufferedReader) is False and \
840 isinstance (ins, io.FileIO) is False:
841 raise InvalidParameter ("file to hash must be opened in “binary” mode")
844 hdr = hdr_read_stream (ins)
845 except EndOfFile as exn:
846 noise ("PDT: malformed input: end of file reading first object header")
851 pver = hdr ["paramversion"]
852 if PDTCRYPT_VERBOSE is True:
853 noise ("PDT: salt of first object : %s" % binascii.hexlify (nacl))
854 noise ("PDT: parameter version of archive : %d" % pver)
857 defs = ENCRYPTION_PARAMETERS.get(pver, None)
858 kdfname, params = defs ["kdf"]
859 if kdfname != "scrypt":
860 noise ("PDT: input is not an SCRYPT archive")
863 kdf = kdf_by_version (None, defs)
864 except ValueError as exn:
865 noise ("PDT: object has unknown parameter version %d" % pver)
867 hsh, _void = kdf (pw, nacl)
869 return hsh, nacl, hdr ["version"], pver
872 def scrypt_hashfile (pw, fname):
874 Calculate the SCRYPT hash from the password and the information contained
875 in the first header found in the given file. The header is read only at
878 with deptdcrypt_mk_stream (PDTCRYPT_SOURCE, fname or "-") as ins:
879 hsh, _void, _void, _void = scrypt_hashsource (pw, ins)
883 ###############################################################################
885 ###############################################################################
887 class Crypto (object):
889 Encryption context to remain alive throughout an entire tarfile pass.
894 cnt = None # file counter (uint32_t != 0)
895 iv = None # current IV
896 fixed = None # accu for 64 bit fixed parts of IV
897 used_ivs = None # tracks IVs
898 strict_ivs = False # if True, panic on duplicate object IV
907 info_counter_used = False
908 index_counter_used = False
910 def __init__ (self, *al, **akv):
911 self.used_ivs = set ()
912 self.set_parameters (*al, **akv)
915 def next_fixed (self):
920 def set_object_counter (self, cnt=None):
922 Safely set the internal counter of encrypted objects. Numerous
925 The same counter may not be reused in combination with one IV fixed
926 part. This is validated elsewhere in the IV handling.
928 Counter zero is invalid. The first two counters are reserved for
929 metadata. The implementation does not allow for splitting metadata
930 files over multiple encrypted objects. (This would be possible by
931 assigning new fixed parts.) Thus in a Deltatar backup there is at most
932 one object with a counter value of one and two. On creation of a
933 context, the initial counter may be chosen. The globals
934 ``AES_GCM_IV_CNT_INFOFILE`` and ``AES_GCM_IV_CNT_INDEX`` can be used to
935 request one of the reserved values. If one of these values has been
936 used, any further attempt of setting the counter to that value will
937 be rejected with an ``InvalidFileCounter`` exception.
939 Out of bounds values (i. e. below one and more than the maximum of 2³²)
940 cause an ``InvalidParameter`` exception to be thrown.
943 self.cnt = AES_GCM_IV_CNT_DATA
945 if cnt == 0 or cnt > AES_GCM_IV_CNT_MAX + 1:
946 raise InvalidParameter ("invalid counter value %d requested: "
947 "acceptable values are from 1 to %d"
948 % (cnt, AES_GCM_IV_CNT_MAX))
949 if cnt == AES_GCM_IV_CNT_INFOFILE:
950 if self.info_counter_used is True:
951 raise InvalidFileCounter ("attempted to reuse info file "
952 "counter %d: must be unique" % cnt)
953 self.info_counter_used = True
954 elif cnt == AES_GCM_IV_CNT_INDEX:
955 if self.index_counter_used is True:
956 raise InvalidFileCounter ("attempted to reuse index file "
957 " counter %d: must be unique" % cnt)
958 self.index_counter_used = True
959 if cnt <= AES_GCM_IV_CNT_MAX:
962 # cnt == AES_GCM_IV_CNT_MAX + 1 → wrap
963 self.cnt = AES_GCM_IV_CNT_DATA
967 def set_parameters (self, password=None, key=None, paramversion=None,
968 nacl=None, counter=None, strict_ivs=False):
970 Configure the internal state of a crypto context. Not intended for
974 self.set_object_counter (counter)
975 self.strict_ivs = strict_ivs
977 if paramversion is not None:
978 self.paramversion = paramversion
981 self.key, self.nacl = key, nacl
984 if password is not None:
985 if isinstance (password, bytes) is False:
986 password = str.encode (password)
987 self.password = password
988 if paramversion is None and nacl is None:
989 # postpone key setup until first header is available
991 kdf = kdf_by_version (paramversion)
993 self.key, self.nacl = kdf (password, nacl)
996 def process (self, buf):
998 Encrypt / decrypt a buffer. Invokes the ``.update()`` method on the
999 wrapped encryptor or decryptor, respectively.
1001 The Cryptography exception ``AlreadyFinalized`` is translated to an
1002 ``InternalError`` at this point. It may occur in sound code when the GC
1003 closes an encrypting stream after an error. Everywhere else it must be
1006 if self.enc is None:
1007 raise RuntimeError ("process: context not initialized")
1008 self.stats ["in"] += len (buf)
1010 out = self.enc.update (buf)
1011 except cryptography.exceptions.AlreadyFinalized as exn:
1012 raise InternalError (exn)
1013 self.stats ["out"] += len (out)
1017 def next (self, password, paramversion, nacl, iv):
1019 Prepare for encrypting another object: Reset the data counters and
1020 change the configuration in case one of the variable parameters differs
1021 from the last object. Also check the IV for duplicates and error out
1022 if strict checking was requested.
1026 self.stats ["obj"] += 1
1028 self.check_duplicate_iv (iv)
1030 if ( self.paramversion != paramversion
1031 or self.password != password
1032 or self.nacl != nacl):
1033 self.set_parameters (password=password, paramversion=paramversion,
1034 nacl=nacl, strict_ivs=self.strict_ivs)
1037 def check_duplicate_iv (self, iv):
1039 Add an IV (the 12 byte representation as in the header) to the list. With
1040 strict checking enabled, this will throw a ``DuplicateIV``. Depending on
1041 the context, this may indicate a serious error (IV reuse).
1043 if self.strict_ivs is True and iv in self.used_ivs:
1044 raise DuplicateIV ("iv %s was reused" % iv_fmt (iv))
1045 # vi has not been used before; add to collection
1046 self.used_ivs.add (iv)
1049 def counters (self):
1051 Access the data counters.
1053 return self.stats ["obj"], self.stats ["in"], self.stats ["out"]
1058 Clear the current context regardless of its finalization state. The
1059 next operation must be ``.next()``.
1064 class Encrypt (Crypto):
1070 def __init__ (self, version, paramversion, password=None, key=None, nacl=None,
1071 counter=AES_GCM_IV_CNT_DATA, strict_ivs=True):
1073 The ctor will throw immediately if one of the parameters does not conform
1074 to our expectations.
1076 counter=AES_GCM_IV_CNT_DATA, strict_ivs=True):
1077 :type version: int to fit uint16_t
1078 :type paramversion: int to fit uint16_t
1079 :param password: mutually exclusive with ``key``
1080 :type password: bytes
1081 :param key: mutually exclusive with ``password``
1084 :type counter: initial object counter the values
1085 ``AES_GCM_IV_CNT_INFOFILE`` and
1086 ``AES_GCM_IV_CNT_INDEX`` are unique in each backup set
1087 and cannot be reused even with different fixed parts.
1088 :type strict_ivs: bool
1090 if password is None and key is None \
1091 or password is not None and key is not None :
1092 raise InvalidParameter ("__init__: need either key or password")
1095 if isinstance (key, bytes) is False:
1096 raise InvalidParameter ("__init__: key must be provided as "
1097 "bytes, not %s" % type (key))
1099 raise InvalidParameter ("__init__: salt must be provided along "
1100 "with encryption key")
1101 else: # password, no key
1102 if isinstance (password, str) is False:
1103 raise InvalidParameter ("__init__: password must be a string, not %s"
1105 if len (password) == 0:
1106 raise InvalidParameter ("__init__: supplied empty password but not "
1107 "permitted for PDT encrypted files")
1109 if isinstance (version, int) is False:
1110 raise InvalidParameter ("__init__: version number must be an "
1111 "integer, not %s" % type (version))
1113 raise InvalidParameter ("__init__: version number must be a "
1114 "nonnegative integer, not %d" % version)
1116 if isinstance (paramversion, int) is False:
1117 raise InvalidParameter ("__init__: crypto parameter version number "
1118 "must be an integer, not %s"
1119 % type (paramversion))
1120 if paramversion < 0:
1121 raise InvalidParameter ("__init__: crypto parameter version number "
1122 "must be a nonnegative integer, not %d"
1125 if nacl is not None:
1126 if isinstance (nacl, bytes) is False:
1127 raise InvalidParameter ("__init__: salt given, but of type %s "
1128 "instead of bytes" % type (nacl))
1129 # salt length would depend on the actual encryption so it can’t be
1130 # validated at this point
1132 self.version = version
1133 self.paramenc = ENCRYPTION_PARAMETERS.get (paramversion) ["enc"]
1135 super().__init__ (password, key, paramversion, nacl, counter=counter,
1136 strict_ivs=strict_ivs)
1139 def next_fixed (self, retries=PDTCRYPT_IV_GEN_MAX_RETRIES):
1141 Generate the next IV fixed part by reading eight bytes from
1142 ``/dev/urandom``. The buffer so obtained is tested against the fixed
1143 parts used so far to prevent accidental reuse of IVs. After a
1144 configurable number of attempts to create a unique fixed part, it will
1145 refuse to continue with an ``IVFixedPartError``. This is unlikely to
1146 ever happen on a normal system but may detect an issue with the random
1149 The list of fixed parts that were used by the context at hand can be
1150 accessed through the ``.fixed`` list. Its last element is the fixed
1151 part currently in use.
1155 fp = os.urandom (PDTCRYPT_IV_FIXEDPART_SIZE)
1156 if fp not in self.fixed:
1157 self.fixed.append (fp)
1160 raise IVFixedPartError ("error obtaining a unique IV fixed part from "
1161 "/dev/urandom; giving up after %d tries" % i)
1166 Construct a 12-bytes IV from the current fixed part and the object
1169 return struct.pack(FMT_I2N_IV, self.fixed [-1], self.cnt)
1172 def next (self, filename=None, counter=None):
1174 Prepare for encrypting the next incoming object. Update the counter
1175 and put together the IV, possibly changing prefixes. Then create the
1178 The argument ``counter`` can be used to specify a file counter for this
1179 object. Unless it is one of the reserved values, the counter of
1180 subsequent objects will be computed from this one.
1182 If this is the first object in a series, ``filename`` is required,
1183 otherwise it is reused if not present. The value is used to derive a
1184 header sized placeholder to use until after encryption when all the
1185 inputs to construct the final header are available. This is then
1186 matched in ``.done()`` against the value found at the position of the
1187 header. The motivation for this extra check is primarily to assist
1188 format debugging: It makes stray headers easy to spot in malformed
1191 if filename is None:
1192 if self.lastinfo is None:
1193 raise InvalidParameter ("next: filename is mandatory for "
1195 filename, _dummy = self.lastinfo
1197 if isinstance (filename, str) is False:
1198 raise InvalidParameter ("next: filename must be a string, no %s"
1200 if counter is not None:
1201 if isinstance (counter, int) is False:
1202 raise InvalidParameter ("next: the supplied counter is of "
1203 "invalid type %s; please pass an "
1204 "integer instead" % type (counter))
1205 self.set_object_counter (counter)
1207 self.iv = self.iv_make ()
1208 if self.paramenc == "aes-gcm":
1210 ( algorithms.AES (self.key)
1211 , modes.GCM (self.iv)
1212 , backend = default_backend ()) \
1214 elif self.paramenc == "passthrough":
1215 self.enc = PassthroughCipher ()
1217 raise InvalidParameter ("next: parameter version %d not known"
1218 % self.paramversion)
1219 hdrdum = hdr_make_dummy (filename)
1220 self.lastinfo = (filename, hdrdum)
1221 super().next (self.password, self.paramversion, self.nacl, self.iv)
1223 self.set_object_counter (self.cnt + 1)
1227 def done (self, cmpdata):
1229 Complete encryption of an object. After this has been called, attempts
1230 of encrypting further data will cause an error until ``.next()`` is
1233 Returns a 64 bytes buffer containing the object header including all
1234 values including the “late” ones e. g. the ciphertext size and the
1237 if isinstance (cmpdata, bytes) is False:
1238 raise InvalidParameter ("done: comparison input expected as bytes, "
1239 "not %s" % type (cmpdata))
1240 if self.lastinfo is None:
1241 raise RuntimeError ("done: encryption context not initialized")
1242 filename, hdrdum = self.lastinfo
1243 if cmpdata != hdrdum:
1244 raise RuntimeError ("done: bad sync of header for object %d: "
1245 "preliminary data does not match; this likely "
1246 "indicates a wrongly repositioned stream"
1248 data = self.enc.finalize ()
1249 self.stats ["out"] += len (data)
1250 self.ctsize += len (data)
1251 ok, hdr = hdr_from_params (self.version, self.paramversion, self.nacl,
1252 self.iv, self.ctsize, self.enc.tag)
1254 raise InternalError ("error constructing header: %r" % hdr)
1255 return data, hdr, self.fixed
1258 def process (self, buf):
1260 Encrypt a chunk of plaintext with the active encryptor. Returns the
1261 size of the input consumed. This **must** be checked downstream. If the
1262 maximum possible object size has been reached, the current context must
1263 be finalized and a new one established before any further data can be
1264 encrypted. The second argument is the remainder of the plaintext that
1265 was not encrypted for the caller to use immediately after the new
1268 if isinstance (buf, bytes) is False:
1269 raise InvalidParameter ("process: expected byte buffer, not %s"
1272 newptsize = self.ptsize + bsize
1273 diff = newptsize - PDTCRYPT_MAX_OBJ_SIZE
1276 newptsize = PDTCRYPT_MAX_OBJ_SIZE
1277 self.ptsize = newptsize
1278 data = super().process (buf [:bsize])
1279 self.ctsize += len (data)
1283 class Decrypt (Crypto):
1285 tag = None # GCM tag, part of header
1286 last_iv = None # check consecutive ivs in strict mode
1288 def __init__ (self, password=None, key=None, counter=None, fixedparts=None,
1291 Sanitizing ctor for the decryption context. ``fixedparts`` specifies a
1292 list of IV fixed parts accepted during decryption. If a fixed part is
1293 encountered that is not in the list, decryption will fail.
1295 :param password: mutually exclusive with ``key``
1296 :type password: bytes
1297 :param key: mutually exclusive with ``password``
1299 :type counter: initial object counter the values
1300 ``AES_GCM_IV_CNT_INFOFILE`` and
1301 ``AES_GCM_IV_CNT_INDEX`` are unique in each backup set
1302 and cannot be reused even with different fixed parts.
1303 :type fixedparts: bytes list
1305 if password is None and key is None \
1306 or password is not None and key is not None :
1307 raise InvalidParameter ("__init__: need either key or password")
1310 if isinstance (key, bytes) is False:
1311 raise InvalidParameter ("__init__: key must be provided as "
1312 "bytes, not %s" % type (key))
1313 else: # password, no key
1314 if isinstance (password, str) is False:
1315 raise InvalidParameter ("__init__: password must be a string, not %s"
1317 if len (password) == 0:
1318 raise InvalidParameter ("__init__: supplied empty password but not "
1319 "permitted for PDT encrypted files")
1321 if fixedparts is not None:
1322 if isinstance (fixedparts, list) is False:
1323 raise InvalidParameter ("__init__: IV fixed parts must be "
1324 "supplied as list, not %s"
1325 % type (fixedparts))
1326 self.fixed = fixedparts
1329 super().__init__ (password=password, key=key, counter=counter,
1330 strict_ivs=strict_ivs)
1333 def valid_fixed_part (self, iv):
1335 Check if a fixed part was already seen.
1337 # check if fixed part is known
1338 fixed, _cnt = struct.unpack (FMT_I2N_IV, iv)
1339 i = bisect.bisect_left (self.fixed, fixed)
1340 return i != len (self.fixed) and self.fixed [i] == fixed
1343 def check_consecutive_iv (self, iv):
1345 Check whether the counter part of the given IV is indeed the successor
1346 of the currently present counter. This should always be the case for
1347 the objects in a well formed PDT archive but should not be enforced
1348 when decrypting out-of-order.
1350 fixed, cnt = struct.unpack (FMT_I2N_IV, iv)
1351 if self.strict_ivs is True \
1352 and self.last_iv is not None \
1353 and self.last_iv [0] == fixed \
1354 and self.last_iv [1] != cnt - 1:
1355 raise NonConsecutiveIV ("iv %s counter not successor of "
1356 "last object (expected %d, found %d)"
1357 % (fixed, iv_fmt (self.last_iv [1]), cnt))
1358 self.last_iv = (fixed, cnt)
1361 def next (self, hdr):
1363 Start decrypting the next object. The PDTCRYPT header for the object
1364 can be given either as already parsed object or as bytes.
1366 if isinstance (hdr, bytes) is True:
1367 hdr = hdr_read (hdr)
1368 elif isinstance (hdr, dict) is False:
1369 # this won’t catch malformed specs though
1370 raise InvalidParameter ("next: wrong type of parameter hdr: "
1371 "expected bytes or spec, got %s"
1374 paramversion = hdr ["paramversion"]
1379 raise InvalidHeader ("next: not a header %r" % hdr)
1381 super().next (self.password, paramversion, nacl, iv)
1382 if self.fixed is not None and self.valid_fixed_part (iv) is False:
1383 raise InvalidIVFixedPart ("iv %s has invalid fixed part"
1385 self.check_consecutive_iv (iv)
1388 defs = ENCRYPTION_PARAMETERS.get (paramversion, None)
1390 raise FormatError ("header contains unknown parameter version %d; "
1391 "maybe the file was created by a more recent "
1392 "version of Deltatar" % paramversion)
1394 if enc == "aes-gcm":
1396 ( algorithms.AES (self.key)
1397 , modes.GCM (iv, tag=self.tag)
1398 , backend = default_backend ()) \
1400 elif enc == "passthrough":
1401 self.enc = PassthroughCipher ()
1403 raise InternalError ("encryption parameter set %d refers to unknown "
1404 "mode %r" % (paramversion, enc))
1405 self.set_object_counter (self.cnt + 1)
1408 def done (self, tag=None):
1410 Stop decryption of the current object and finalize it with the active
1411 context. This will throw an *InvalidGCMTag* exception to indicate that
1412 the authentication tag does not match the data. If the tag is correct,
1413 the rest of the plaintext is returned.
1418 data = self.enc.finalize ()
1420 if isinstance (tag, bytes) is False:
1421 raise InvalidParameter ("done: wrong type of parameter "
1422 "tag: expected bytes, got %s"
1424 data = self.enc.finalize_with_tag (self.tag)
1425 except cryptography.exceptions.InvalidTag:
1426 raise InvalidGCMTag ("done: tag mismatch of object %d: %s "
1427 "rejected by finalize ()"
1428 % (self.cnt, binascii.hexlify (self.tag)))
1429 self.ctsize += len (data)
1430 self.stats ["out"] += len (data)
1434 def process (self, buf):
1436 Decrypt the bytes object *buf* with the active decryptor.
1438 if isinstance (buf, bytes) is False:
1439 raise InvalidParameter ("process: expected byte buffer, not %s"
1441 self.ctsize += len (buf)
1442 data = super().process (buf)
1443 self.ptsize += len (data)
1447 ###############################################################################
1449 ###############################################################################
1451 def _patch_global (glob, vow, n=None):
1453 Adapt upper file counter bound for testing IV logic. Completely unsafe.
1455 assert vow == "I am fully aware that this will void my warranty."
1456 r = globals () [glob]
1458 n = globals () [glob + "_DEFAULT"]
1459 globals () [glob] = n
1462 _testing_set_AES_GCM_IV_CNT_MAX = \
1463 partial (_patch_global, "AES_GCM_IV_CNT_MAX")
1465 _testing_set_PDTCRYPT_MAX_OBJ_SIZE = \
1466 partial (_patch_global, "PDTCRYPT_MAX_OBJ_SIZE")
1468 def open2_dump_file (fname, dir_fd, force=False):
1471 oflags = os.O_CREAT | os.O_WRONLY
1473 oflags |= os.O_TRUNC
1478 outfd = os.open (fname, oflags,
1479 stat.S_IRUSR | stat.S_IWUSR, dir_fd=dir_fd)
1480 except FileExistsError as exn:
1481 noise ("PDT: refusing to overwrite existing file %s" % fname)
1483 raise RuntimeError ("destination file %s already exists" % fname)
1484 if PDTCRYPT_VERBOSE is True:
1485 noise ("PDT: new output file %s (fd=%d)" % (fname, outfd))
1489 ###############################################################################
1490 ## freestanding invocation
1491 ###############################################################################
1493 PDTCRYPT_SUB_PROCESS = 0
1494 PDTCRYPT_SUB_SCRYPT = 1
1495 PDTCRYPT_SUB_SCAN = 2
1498 { "process" : PDTCRYPT_SUB_PROCESS
1499 , "scrypt" : PDTCRYPT_SUB_SCRYPT
1500 , "scan" : PDTCRYPT_SUB_SCAN }
1502 PDTCRYPT_DECRYPT = 1 << 0 # decrypt archive with password
1503 PDTCRYPT_SPLIT = 1 << 1 # split archive into individual objects
1504 PDTCRYPT_HASH = 1 << 2 # output scrypt hash for file and given password
1506 PDTCRYPT_SPLITNAME = "pdtcrypt-object-%d.bin"
1507 PDTCRYPT_RESCUENAME = "pdtcrypt-rescue-object-%0.5d.bin"
1509 PDTCRYPT_VERBOSE = False
1510 PDTCRYPT_STRICTIVS = False
1511 PDTCRYPT_OVERWRITE = False
1512 PDTCRYPT_BLOCKSIZE = 1 << 12
1517 PDTCRYPT_DEFAULT_VER = 1
1518 PDTCRYPT_DEFAULT_PVER = 1
1520 # scrypt hashing output control
1521 PDTCRYPT_SCRYPT_INTRANATOR = 0
1522 PDTCRYPT_SCRYPT_PARAMETERS = 1
1523 PDTCRYPT_SCRYPT_DEFAULT = PDTCRYPT_SCRYPT_INTRANATOR
1525 PDTCRYPT_SCRYPT_FORMAT = \
1526 { "i2n" : PDTCRYPT_SCRYPT_INTRANATOR
1527 , "params" : PDTCRYPT_SCRYPT_PARAMETERS }
1529 PDTCRYPT_TT_COLUMNS = 80 # assume standard terminal
1531 class PDTDecryptionError (Exception):
1532 """Decryption failed."""
1534 class PDTSplitError (Exception):
1535 """Decryption failed."""
1538 def noise (*a, **b):
1539 print (file=sys.stderr, *a, **b)
1542 class PassthroughDecryptor (object):
1544 curhdr = None # write current header on first data write
1546 def __init__ (self):
1547 if PDTCRYPT_VERBOSE is True:
1548 noise ("PDT: no encryption; data passthrough")
1550 def next (self, hdr):
1551 ok, curhdr = hdr_make (hdr)
1553 raise PDTDecryptionError ("bad header %r" % hdr)
1554 self.curhdr = curhdr
1557 if self.curhdr is not None:
1561 def process (self, d):
1562 if self.curhdr is not None:
1568 def depdtcrypt (mode, secret, ins, outs):
1570 Remove PDTCRYPT layer from all objects encrypted with the secret. Used on a
1571 Deltatar backup this will yield a (possibly Gzip compressed) tarball.
1573 ctleft = -1 # length of ciphertext to consume
1574 ctcurrent = 0 # total ciphertext of current object
1575 total_obj = 0 # total number of objects read
1576 total_pt = 0 # total plaintext bytes
1577 total_ct = 0 # total ciphertext bytes
1578 total_read = 0 # total bytes read
1579 outfile = None # Python file object for output
1581 if mode & PDTCRYPT_DECRYPT: # decryptor
1583 if ks == PDTCRYPT_SECRET_PW:
1584 decr = Decrypt (password=secret [1], strict_ivs=PDTCRYPT_STRICTIVS)
1585 elif ks == PDTCRYPT_SECRET_KEY:
1587 decr = Decrypt (key=key, strict_ivs=PDTCRYPT_STRICTIVS)
1589 raise InternalError ("‘%d’ does not specify a valid kind of secret"
1592 decr = PassthroughDecryptor ()
1595 """Dummy for non-split mode: output file does not vary."""
1598 if mode & PDTCRYPT_SPLIT:
1599 def nextout (outfile):
1601 We were passed an fd as outs for accessing the destination
1602 directory where extracted archive components are supposed
1607 if PDTCRYPT_VERBOSE is True:
1608 noise ("PDT: no output file to close at this point")
1610 if PDTCRYPT_VERBOSE is True:
1611 noise ("PDT: release output file %r" % outfile)
1612 # cleanup happens automatically by the GC; the next
1613 # line will error out on account of an invalid fd
1616 assert total_obj > 0
1617 fname = PDTCRYPT_SPLITNAME % total_obj
1619 outfd = open2_dump_file (fname, outs, force=PDTCRYPT_OVERWRITE)
1620 except RuntimeError as exn:
1621 raise PDTSplitError (exn)
1622 return os.fdopen (outfd, "wb", closefd=True)
1626 """ESPIPE is normal on non-seekable stdio stream."""
1629 except OSError as exn:
1630 if exn.errno == errno.ESPIPE:
1633 def out (pt, outfile):
1637 if PDTCRYPT_VERBOSE is True:
1638 noise ("PDT:\t· decrypt plaintext %d B" % (npt))
1640 nn = outfile.write (pt)
1641 except OSError as exn: # probably ENOSPC
1642 raise DecryptionError ("error (%s)" % exn)
1644 raise DecryptionError ("write aborted after %d of %d B" % (nn, npt))
1648 # current object completed; in a valid archive this marks either
1649 # the start of a new header or the end of the input
1650 if ctleft == 0: # current object requires finalization
1651 if PDTCRYPT_VERBOSE is True:
1652 noise ("PDT: %d finalize" % tell (ins))
1655 except InvalidGCMTag as exn:
1656 raise DecryptionError ("error finalizing object %d (%d B): "
1657 "%r" % (total_obj, len (pt), exn)) \
1660 if PDTCRYPT_VERBOSE is True:
1661 noise ("PDT:\t· object validated")
1663 if PDTCRYPT_VERBOSE is True:
1664 noise ("PDT: %d hdr" % tell (ins))
1666 hdr = hdr_read_stream (ins)
1667 total_read += PDTCRYPT_HDR_SIZE
1668 except EndOfFile as exn:
1669 total_read += exn.remainder
1670 if total_ct + total_obj * PDTCRYPT_HDR_SIZE != total_read:
1671 raise PDTDecryptionError ("ciphertext processed (%d B) plus "
1672 "overhead (%d × %d B) does not match "
1673 "the number of bytes read (%d )"
1674 % (total_ct, total_obj, PDTCRYPT_HDR_SIZE,
1676 # the single good exit
1677 return total_read, total_obj, total_ct, total_pt
1678 except InvalidHeader as exn:
1679 raise PDTDecryptionError ("invalid header at position %d in %r "
1680 "(%s)" % (tell (ins), exn, ins))
1681 if PDTCRYPT_VERBOSE is True:
1682 pretty = hdr_fmt_pretty (hdr)
1683 noise (reduce (lambda a, e: (a + "\n" if a else "") + "PDT:\t· " + e,
1684 pretty.splitlines (), ""))
1685 ctcurrent = ctleft = hdr ["ctsize"]
1689 total_obj += 1 # used in file counter with split mode
1691 # finalization complete or skipped in case of first object in
1692 # stream; create a new output file if necessary
1693 outfile = nextout (outfile)
1695 if PDTCRYPT_VERBOSE is True:
1696 noise ("PDT: %d decrypt obj no. %d, %d B"
1697 % (tell (ins), total_obj, ctleft))
1699 # always allocate a new buffer since python-cryptography doesn’t allow
1700 # passing a bytearray :/
1701 nexpect = min (ctleft, PDTCRYPT_BLOCKSIZE)
1702 if PDTCRYPT_VERBOSE is True:
1703 noise ("PDT:\t· [%d] %d%% done, read block (%d B of %d B remaining)"
1705 100 - ctleft * 100 / (ctcurrent > 0 and ctcurrent or 1),
1707 ct = ins.read (nexpect)
1711 raise EndOfFile (nct,
1712 "hit EOF after %d of %d B in block [%d:%d); "
1713 "%d B ciphertext remaining for object no %d"
1714 % (nct, nexpect, off, off + nexpect, ctleft,
1720 if PDTCRYPT_VERBOSE is True:
1721 noise ("PDT:\t· decrypt ciphertext %d B" % (nct))
1722 pt = decr.process (ct)
1726 def deptdcrypt_mk_stream (kind, path):
1727 """Create stream from file or stdio descriptor."""
1728 if kind == PDTCRYPT_SINK:
1730 if PDTCRYPT_VERBOSE is True: noise ("PDT: sink: stdout")
1731 return sys.stdout.buffer
1733 if PDTCRYPT_VERBOSE is True: noise ("PDT: sink: file %s" % path)
1734 return io.FileIO (path, "w")
1735 if kind == PDTCRYPT_SOURCE:
1737 if PDTCRYPT_VERBOSE is True: noise ("PDT: source: stdin")
1738 return sys.stdin.buffer
1740 if PDTCRYPT_VERBOSE is True: noise ("PDT: source: file %s" % path)
1741 return io.FileIO (path, "r")
1743 raise ValueError ("bogus stream “%s” / %s" % (kind, path))
1746 def mode_depdtcrypt (mode, secret, ins, outs):
1748 total_read, total_obj, total_ct, total_pt = \
1749 depdtcrypt (mode, secret, ins, outs)
1750 except DecryptionError as exn:
1751 noise ("PDT: Decryption failed:")
1753 noise ("PDT: “%s”" % exn)
1755 noise ("PDT: Did you specify the correct key / password?")
1758 except PDTSplitError as exn:
1759 noise ("PDT: Split operation failed:")
1761 noise ("PDT: “%s”" % exn)
1763 noise ("PDT: Hint: target directory should be empty.")
1767 if PDTCRYPT_VERBOSE is True:
1768 noise ("PDT: decryption successful" )
1769 noise ("PDT: %.10d bytes read" % total_read)
1770 noise ("PDT: %.10d objects decrypted" % total_obj )
1771 noise ("PDT: %.10d bytes ciphertext" % total_ct )
1772 noise ("PDT: %.10d bytes plaintext" % total_pt )
1778 def mode_scrypt (pw, ins=None, nacl=None, fmt=PDTCRYPT_SCRYPT_INTRANATOR):
1780 paramversion = PDTCRYPT_DEFAULT_PVER
1782 hsh, nacl, version, paramversion = scrypt_hashsource (pw, ins)
1783 defs = ENCRYPTION_PARAMETERS.get(paramversion, None)
1785 nacl = binascii.unhexlify (nacl)
1786 defs = ENCRYPTION_PARAMETERS.get(paramversion, None)
1787 version = PDTCRYPT_DEFAULT_VER
1789 kdfname, params = defs ["kdf"]
1791 kdf = kdf_by_version (None, defs)
1792 hsh, _void = kdf (pw, nacl)
1796 if fmt == PDTCRYPT_SCRYPT_INTRANATOR:
1797 out = json.dumps ({ "salt" : base64.b64encode (nacl).decode ()
1798 , "key" : base64.b64encode (hsh) .decode ()
1799 , "paramversion" : paramversion })
1800 elif fmt == PDTCRYPT_SCRYPT_PARAMETERS:
1801 out = json.dumps ({ "salt" : binascii.hexlify (nacl).decode ()
1802 , "key" : binascii.hexlify (hsh) .decode ()
1803 , "version" : version
1804 , "scrypt_params" : { "N" : params ["N"]
1805 , "r" : params ["r"]
1806 , "p" : params ["p"]
1807 , "dkLen" : params ["dkLen"] } })
1809 raise RuntimeError ("bad scrypt output scheme %r" % fmt)
1814 def noise_output_candidates (cands, indent=8, cols=PDTCRYPT_TT_COLUMNS):
1816 Print a list of offsets without garbling the terminal too much.
1818 The indent is counted from column zero; if it is wide enough, the “PDT: ”
1819 marker will be prepended, considered part of the indentation.
1823 idt = " " * indent if indent < 5 else "PDT: " + " " * (indent - 5)
1828 init = True # prevent leading separator
1831 raise ValueError ("the requested indentation exceeds the line "
1832 "width by %d" % (indent - wd))
1842 if lpos > wd: # line break
1858 SLICE_START = 1 # ordering is important to have starts of intervals
1859 SLICE_END = 0 # sorted before equal ends
1861 def find_overlaps (slices):
1863 Find overlapping slices: iterate open/close points of intervals, tracking
1864 the ones open at any time.
1867 inside = set () # of indices into bounds
1868 ovrlp = set () # of indices into bounds
1870 for i, s in enumerate (slices):
1871 bounds.append ((s [0], SLICE_START, i))
1872 bounds.append ((s [1], SLICE_END , i))
1873 bounds = sorted (bounds)
1877 if val [1] == SLICE_START:
1880 if len (inside) > 1: # closing one that overlapped
1884 return [ slices [i] for i in ovrlp ]
1887 def mode_scan (secret, fname, outs=None, nacl=None):
1889 Dissect a binary file, looking for PDTCRYPT headers and objects.
1891 If *outs* is supplied, recoverable data will be dumped into the specified
1895 ifd = os.open (fname, os.O_RDONLY)
1896 except FileNotFoundError:
1897 noise ("PDT: failed to open %s readonly" % fname)
1902 if PDTCRYPT_VERBOSE is True:
1903 noise ("PDT: scan for potential sync points")
1904 cands = locate_hdr_candidates (ifd)
1905 if len (cands) == 0:
1906 noise ("PDT: scan complete: input does not contain potential PDT "
1907 "headers; giving up.")
1909 if PDTCRYPT_VERBOSE is True:
1910 noise ("PDT: scan complete: found %d candidates:" % len (cands))
1911 noise_output_candidates (cands)
1916 junk, todo, slices = [], [], []
1921 vdt, hdr = inspect_hdr (ifd, cand)
1923 vdts = verdict_fmt (vdt)
1925 if vdt == HDR_CAND_JUNK:
1926 noise ("PDT: obj %d: %s object: bad header, skipping" % vdts)
1929 off0 = cand + PDTCRYPT_HDR_SIZE
1930 if PDTCRYPT_VERBOSE is True:
1931 noise ("PDT: obj %d: read payload @%d" % (nobj, off0))
1932 pretty = hdr_fmt_pretty (hdr)
1933 noise (reduce (lambda a, e: (a + "\n" if a else "") + "PDT:\t· " + e,
1934 pretty.splitlines (), ""))
1937 if outs is not None:
1938 ofname = PDTCRYPT_RESCUENAME % nobj
1939 ofd = open2_dump_file (ofname, outs, force=PDTCRYPT_OVERWRITE)
1941 ctsize = hdr ["ctsize"]
1943 l = try_decrypt (ifd, off0, hdr, secret, ofd=ofd)
1945 slices.append ((off0, off0 + l))
1949 if vdt == HDR_CAND_GOOD and ok is True:
1950 noise ("PDT: %d → ✓ %s object %d–%d"
1951 % (cand, vdts, off0, off0 + ctsize))
1952 elif vdt == HDR_CAND_FISHY and ok is True:
1953 noise ("PDT: %d → × %s object %d–%d, corrupt header"
1954 % (cand, vdts, off0, off0 + ctsize))
1955 elif vdt == HDR_CAND_GOOD and ok is False:
1956 noise ("PDT: %d → × %s object %d–%d, problematic payload"
1957 % (cand, vdts, off0, off0 + ctsize))
1958 elif vdt == HDR_CAND_FISHY and ok is False:
1959 noise ("PDT: %d → × %s object %d–%d, corrupt header, problematic "
1960 "ciphertext" % (cand, vdts, off0, off0 + ctsize))
1967 noise ("PDT: all headers ok")
1969 noise ("PDT: %d candidates not parseable as headers:" % len (junk))
1970 noise_output_candidates (junk)
1972 overlap = find_overlaps (slices)
1973 if len (overlap) > 0:
1974 noise ("PDT: %d objects overlapping others" % len (overlap))
1975 for slice in overlap:
1976 noise ("PDT: × %d→%d" % (slice [0], slice [1]))
1978 def usage (err=False):
1982 indent = ' ' * len (SELF)
1983 out ("usage: %s SUBCOMMAND { --help" % SELF)
1984 out (" %s | [ -v ] { -p PASSWORD | -k KEY }" % indent)
1985 out (" %s [ { -i | --in } { - | SOURCE } ]" % indent)
1986 out (" %s [ { -n | --nacl } { SALT } ]" % indent)
1987 out (" %s [ { -o | --out } { - | DESTINATION } ]" % indent)
1988 out (" %s [ -D | --no-decrypt ] [ -S | --split ]" % indent)
1989 out (" %s [ -f | --format ]" % indent)
1992 out ("\t\tSUBCOMMAND main mode: { process | scrypt }")
1994 out ("\t\t process: extract objects from PDT archive")
1995 out ("\t\t scrypt: calculate hash from password and first object")
1996 out ("\t\t-p PASSWORD password to derive the encryption key from")
1997 out ("\t\t-k KEY encryption key as 16 bytes in hexadecimal notation")
1998 out ("\t\t-s enforce strict handling of initialization vectors")
1999 out ("\t\t-i SOURCE file name to read from")
2000 out ("\t\t-o DESTINATION file to write output to")
2001 out ("\t\t-n SALT provide salt for scrypt mode in hex encoding")
2002 out ("\t\t-v print extra info")
2003 out ("\t\t-S split into files at object boundaries; this")
2004 out ("\t\t requires DESTINATION to refer to directory")
2005 out ("\t\t-D PDT header and ciphertext passthrough")
2006 out ("\t\t-f format of SCRYPT hash output (“default” or “parameters”)")
2008 out ("\tinstead of filenames, “-” may used to specify stdin / stdout")
2010 sys.exit ((err is True) and 42 or 0)
2020 def parse_argv (argv):
2021 global PDTCRYPT_OVERWRITE
2023 mode = PDTCRYPT_DECRYPT
2029 scrypt_format = PDTCRYPT_SCRYPT_DEFAULT
2032 SELF = os.path.basename (next (argvi))
2035 rawsubcmd = next (argvi)
2036 subcommand = PDTCRYPT_SUB [rawsubcmd]
2037 except StopIteration:
2038 bail ("ERROR: subcommand required")
2040 bail ("ERROR: invalid subcommand “%s” specified" % rawsubcmd)
2046 except StopIteration:
2047 bail ("ERROR: argument list incomplete")
2049 def checked_secret (s):
2054 bail ("ERROR: encountered “%s” but secret already given" % arg)
2057 if arg in [ "-h", "--help" ]:
2060 elif arg in [ "-v", "--verbose", "--wtf" ]:
2061 global PDTCRYPT_VERBOSE
2062 PDTCRYPT_VERBOSE = True
2063 elif arg in [ "-i", "--in", "--source" ]:
2064 insspec = checked_arg ()
2065 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt from %s" % insspec)
2066 elif arg in [ "-p", "--password" ]:
2067 arg = checked_arg ()
2068 checked_secret (make_secret (password=arg))
2069 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypting with password")
2071 if subcommand == PDTCRYPT_SUB_PROCESS:
2072 if arg in [ "-s", "--strict-ivs" ]:
2073 global PDTCRYPT_STRICTIVS
2074 PDTCRYPT_STRICTIVS = True
2075 elif arg in [ "-o", "--out", "--dest", "--sink" ]:
2076 outsspec = checked_arg ()
2077 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt to %s" % outsspec)
2078 elif arg in [ "-f", "--force" ]:
2079 PDTCRYPT_OVERWRITE = True
2080 if PDTCRYPT_VERBOSE is True: noise ("PDT: overwrite existing files")
2081 elif arg in [ "-S", "--split" ]:
2082 mode |= PDTCRYPT_SPLIT
2083 if PDTCRYPT_VERBOSE is True: noise ("PDT: split files")
2084 elif arg in [ "-D", "--no-decrypt" ]:
2085 mode &= ~PDTCRYPT_DECRYPT
2086 if PDTCRYPT_VERBOSE is True: noise ("PDT: not decrypting")
2087 elif arg in [ "-k", "--key" ]:
2088 arg = checked_arg ()
2089 checked_secret (make_secret (key=arg))
2090 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypting with key")
2092 bail ("ERROR: unexpected positional argument “%s”" % arg)
2093 elif subcommand == PDTCRYPT_SUB_SCRYPT:
2094 if arg in [ "-n", "--nacl", "--salt" ]:
2095 nacl = checked_arg ()
2096 if PDTCRYPT_VERBOSE is True: noise ("PDT: salt key with %s" % nacl)
2097 elif arg in [ "-f", "--format" ]:
2098 arg = checked_arg ()
2100 scrypt_format = PDTCRYPT_SCRYPT_FORMAT [arg]
2102 bail ("ERROR: invalid scrypt output format %s" % arg)
2103 if PDTCRYPT_VERBOSE is True:
2104 noise ("PDT: scrypt output format “%s”" % scrypt_format)
2106 bail ("ERROR: unexpected positional argument “%s”" % arg)
2107 elif subcommand == PDTCRYPT_SUB_SCAN:
2108 if arg in [ "-o", "--out", "--dest", "--sink" ]:
2109 outsspec = checked_arg ()
2110 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt to %s" % outsspec)
2111 elif arg in [ "-f", "--force" ]:
2112 PDTCRYPT_OVERWRITE = True
2113 if PDTCRYPT_VERBOSE is True: noise ("PDT: overwrite existing files")
2115 bail ("ERROR: unexpected positional argument “%s”" % arg)
2118 if PDTCRYPT_VERBOSE is True:
2119 noise ("ERROR: no password or key specified, trying $PDTCRYPT_PASSWORD")
2120 epw = os.getenv ("PDTCRYPT_PASSWORD")
2122 checked_secret (make_secret (password=epw.strip ()))
2125 if PDTCRYPT_VERBOSE is True:
2126 noise ("ERROR: no password or key specified, trying $PDTCRYPT_KEY")
2127 ek = os.getenv ("PDTCRYPT_KEY")
2129 checked_secret (make_secret (key=ek.strip ()))
2132 if subcommand == PDTCRYPT_SUB_SCRYPT:
2133 bail ("ERROR: scrypt hash mode requested but no password given")
2134 elif mode & PDTCRYPT_DECRYPT:
2135 bail ("ERROR: decryption requested but no password given")
2137 if mode & PDTCRYPT_SPLIT and outsspec is None:
2138 bail ("ERROR: split mode is incompatible with stdout sink "
2141 if subcommand == PDTCRYPT_SUB_SCAN and outsspec is None:
2142 pass # no output by default in scan mode
2143 elif mode & PDTCRYPT_SPLIT or subcommand == PDTCRYPT_SUB_SCAN:
2144 # destination must be directory
2146 bail ("ERROR: mode is incompatible with stdout sink")
2149 os.makedirs (outsspec, 0o700)
2150 except FileExistsError:
2151 # if it’s a directory with appropriate perms, everything is
2152 # good; otherwise, below invocation of open(2) will fail
2154 outs = os.open (outsspec, os.O_DIRECTORY, 0o600)
2155 except FileNotFoundError as exn:
2156 bail ("ERROR: cannot create target directory “%s”" % outsspec)
2157 except NotADirectoryError as exn:
2158 bail ("ERROR: target path “%s” is not a directory" % outsspec)
2160 outs = deptdcrypt_mk_stream (PDTCRYPT_SINK, outsspec or "-")
2162 if subcommand == PDTCRYPT_SUB_SCAN:
2164 bail ("ERROR: please supply an input file for scanning")
2166 bail ("ERROR: input must be seekable; please specify a file")
2167 return True, partial (mode_scan, secret, insspec, outs, nacl=nacl)
2169 if subcommand == PDTCRYPT_SUB_SCRYPT:
2170 if secret [0] == PDTCRYPT_SECRET_KEY:
2171 bail ("ERROR: scrypt mode requires a password")
2172 if insspec is not None and nacl is not None \
2173 or insspec is None and nacl is None :
2174 bail ("ERROR: please supply either an input file or "
2179 if insspec is not None or subcommand != PDTCRYPT_SUB_SCRYPT:
2180 ins = deptdcrypt_mk_stream (PDTCRYPT_SOURCE, insspec or "-")
2182 if subcommand == PDTCRYPT_SUB_SCRYPT:
2183 return True, partial (mode_scrypt, secret [1].encode (), ins, nacl,
2186 return True, partial (mode_depdtcrypt, mode, secret, ins, outs)
2190 ok, runner = parse_argv (argv)
2192 if ok is True: return runner ()
2197 if __name__ == "__main__":
2198 sys.exit (main (sys.argv))