ignore GCM tag mismatch in scan mode
[python-delta-tar] / deltatar / crypto.py
CommitLineData
00b3cd10
PG
1#!/usr/bin/env python3
2
3"""
83f2d71e 4Intra2net 2017
00b3cd10
PG
5
6===============================================================================
704ceaa5 7 crypto -- Encryption Layer for the Deltatar Backup
00b3cd10
PG
8===============================================================================
9
10Crypto stack:
11
12 - AES-GCM for the symmetric encryption;
13 - Scrypt as KDF.
14
15References:
16
17 - NIST Recommendation for Block Cipher Modes of Operation: Galois/Counter
18 Mode (GCM) and GMAC
19 http://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf
20
21 - AES-GCM v1:
22 https://cryptome.org/2014/01/aes-gcm-v1.pdf
23
24 - Authentication weaknesses in GCM
25 http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/comments/CWC-GCM/Ferguson2.pdf
26
83f2d71e
PG
27Trouble with python-cryptography packages: authentication tags can only be
28passed in advance: https://github.com/pyca/cryptography/pull/3421
29
6d08915c
PG
30Errors
31-------------------------------------------------------------------------------
32
33Errors fall into roughly three categories:
34
704ceaa5 35 - Cryptographical errors or invalid data.
6d08915c
PG
36
37 - ``InvalidGCMTag`` (decryption failed on account of an invalid GCM
38 tag),
39 - ``InvalidIVFixedPart`` (IV fixed part of object not found in list),
f6cd676f 40 - ``DuplicateIV`` (the IV of an encrypted object already occurred),
704ceaa5
PG
41 - ``DecryptionError`` (used in CLI decryption for presenting error
42 conditions to the user).
6d08915c
PG
43
44 - Incorrect usage of the library.
45
46 - ``InvalidParameter`` (non-conforming user supplied parameter),
47 - ``InvalidHeader`` (data passed for reading not parsable into header),
48 - ``FormatError`` (cannot handle header or parameter version),
49 - ``RuntimeError``.
50
51 - Bad internal state. If one of these is encountered it means that a state
52 was reached that shouldn’t occur during normal processing.
53
54 - ``InternalError``,
55 - ``Unreachable``.
56
57Also, ``EndOfFile`` is used as a sentinel to communicate that a stream supplied
58for reading is exhausted.
59
f6cd676f
PG
60Initialization Vectors
61-------------------------------------------------------------------------------
62
63Initialization vectors are checked reuse during the lifetime of a decryptor.
704ceaa5
PG
64The fixed counters for metadata files cannot be reused and attempts to do so
65will cause a DuplicateIV error. This means the length of objects encrypted with
66a metadata counter is capped at 63 GB.
67
68For ordinary, non-metadata payload, there is an optional mode with strict IV
69checking that causes a crypto context to fail if an IV encountered or created
70was already used for decrypting or encrypting, respectively, an earlier object.
71Note that this mode can trigger false positives when decrypting non-linearly,
72e. g. when traversing the same object multiple times. Since the crypto context
73has no notion of a position in a PDT encrypted archive, this condition must be
74sorted out downstream.
75
76Command Line Utility
77-------------------------------------------------------------------------------
78
79``crypto.py`` may be invoked as a script for decrypting, validating, and
80splitting PDT encrypted files. Consult the usage message for details.
81
82Usage examples:
83
84Decrypt from stdin using the password ‘foo’: ::
85
86 $ crypto.py process foo -i - -o - <some-file.tar.gz.pdtcrypt >some-file.tar.gz
87
88Output verbose information about the encrypted objects in the archive: ::
89
90 $ crypto.py process foo -v -i some-file.tar.gz.pdtcrypt -o /dev/null
91 PDT: decrypt from some-file.tar.gz.pdtcrypt
92 PDT: decrypt to /dev/null
93 PDT: source: file some-file.tar.gz.pdtcrypt
94 PDT: sink: file /dev/null
95 PDT: 0 hdr
96 PDT: · version = 1 : 0100
97 PDT: · paramversion = 1 : 0100
98 PDT: · nacl : d270 b031 00d1 87e2 c946 610d 7b7f 7e5f
99 PDT: · iv : 02ee 3dd7 a963 1eb1 0100 0000
100 PDT: · ctsize = 591 : 4f02 0000 0000 0000
101 PDT: · tag : 5b2d 6d8b 8f82 4842 12fd 0b10 b6e3 369b
102 PDT: 64 decrypt obj no. 1, 591 B
103 PDT: · [64] 0% done, read block (591 B of 591 B remaining)
104 PDT: · decrypt ciphertext 591 B
105 PDT: · decrypt plaintext 591 B
106 PDT: 655 finalize
107
108
109Also, the mode *scrypt* allows deriving encryption keys. To calculate the
110encryption key from the password ‘foo’ and the salt of the first object in a
111PDT encrypted file: ::
112
113 $ crypto.py scrypt foo -i some-file.pdtcrypt
4f6405d6 114 {"paramversion": 1, "salt": "Cqzbk48e3peEjzWto8D0yA==", "key": "JH9EkMwaM4x9F5aim5gK/Q=="}
704ceaa5
PG
115
116The computed 16 byte key is given in hexadecimal notation in the value to
117``hash`` and can be fed into Python’s ``binascii.unhexlify()`` to obtain the
118corresponding binary representation.
119
120Note that in Scrypt hashing mode, no data integrity checks are being performed.
121If the wrong password is given, a wrong key will be derived. Whether the password
122was indeed correct can only be determined by decrypting. Note that since PDT
123archives essentially consist of a stream of independent objects, the salt and
124other parameters may change. Thus a key derived using above method from the
125first object doesn’t necessarily apply to any of the subsequent objects.
f6cd676f 126
00b3cd10
PG
127"""
128
7b3940e5 129import base64
00b3cd10 130import binascii
50710d86 131import bisect
00b3cd10
PG
132import ctypes
133import io
c46c8670 134from functools import reduce, partial
f41973a6 135import mmap
00b3cd10
PG
136import os
137import struct
a808459e 138import stat
00b3cd10
PG
139import sys
140import time
da82bc58 141import types
00b3cd10
PG
142try:
143 import enum34
144except ImportError as exn:
145 pass
146
147if __name__ == "__main__": ## Work around the import mechanism’s lest Python’s
148 pwd = os.getcwd() ## preference for local imports causes a cyclical
149 ## import (crypto → pylibscrypt → […] → ./tarfile → crypto).
150 sys.path = [ p for p in sys.path if p.find ("deltatar") < 0 ]
151
152import pylibscrypt
153from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
154from cryptography.hazmat.backends import default_backend
15d3eefd 155import cryptography
00b3cd10
PG
156
157
a64085a8 158__all__ = [ "hdr_make", "hdr_read", "hdr_fmt", "hdr_fmt_pretty"
b360b772 159 , "scrypt_hashfile"
3031b7ae
PG
160 , "PDTCRYPT_HDR_SIZE", "AES_GCM_IV_CNT_DATA"
161 , "AES_GCM_IV_CNT_INFOFILE", "AES_GCM_IV_CNT_INDEX"
2d6fd8c8 162 ]
00b3cd10 163
a393d9cb
PG
164
165###############################################################################
15d3eefd
PG
166## exceptions
167###############################################################################
168
169class EndOfFile (Exception):
170 """Reached EOF."""
ae3d0f2a
PG
171 remainder = 0
172 msg = 0
8a8ac469 173 def __init__ (self, n=None, msg=None):
5d394c0d
PG
174 if n is not None:
175 self.remainder = n
176 self.msg = msg
15d3eefd 177
b0078f26 178
b12110dd
PG
179class InvalidParameter (Exception):
180 """Inputs not valid for PDT encryption."""
181 pass
182
b0078f26 183
15d3eefd
PG
184class InvalidHeader (Exception):
185 """Header not valid."""
186 pass
187
b0078f26
PG
188
189class InvalidGCMTag (Exception):
190 """
191 The GCM tag calculated during decryption differs from that in the object
192 header.
193 """
194 pass
195
196
26b42ad4 197class InvalidIVFixedPart (Exception):
89ec6e2f
PG
198 """
199 IV fixed part not in supplied list: either the backup is corrupt or the
200 current object does not belong to it.
201 """
26b42ad4
PG
202 pass
203
b0078f26 204
be124bca 205class IVFixedPartError (Exception):
89ec6e2f
PG
206 """
207 Error creating a unique IV fixed part: repeated calls to system RNG yielded
208 the same sequence of bytes as the last IV used.
209 """
be124bca
PG
210 pass
211
212
fac2cfe1 213class InvalidFileCounter (Exception):
89ec6e2f
PG
214 """
215 When encrypting, an attempted reuse of a dedicated counter (info file,
216 index file) was caught.
217 """
fac2cfe1
PG
218 pass
219
220
ee6aa239 221class DuplicateIV (Exception):
89ec6e2f
PG
222 """
223 During encryption, the current IV fixed part is identical to an already
224 existing IV (same prefix and file counter). This indicates tampering or
225 programmer error and cannot be recovered from.
226 """
ee6aa239
PG
227 pass
228
229
230class NonConsecutiveIV (Exception):
89ec6e2f
PG
231 """
232 IVs not numbered consecutively. This is a hard error with strict IV
233 checking. Precludes random access to the encrypted objects.
234 """
ee6aa239
PG
235 pass
236
237
b12110dd
PG
238class FormatError (Exception):
239 """Unusable parameters in header."""
240 pass
241
b0078f26 242
15d3eefd 243class DecryptionError (Exception):
89ec6e2f 244 """Error during decryption with ``crypto.py`` on the command line."""
15d3eefd
PG
245 pass
246
b0078f26 247
70ad9458 248class Unreachable (Exception):
89ec6e2f
PG
249 """
250 Makeshift __builtin_unreachable(); always a programmer error if
251 thrown.
252 """
70ad9458
PG
253 pass
254
b0078f26 255
b12110dd
PG
256class InternalError (Exception):
257 """Errors not ascribable to bad user inputs or cryptography."""
258 pass
259
15d3eefd
PG
260
261###############################################################################
a393d9cb
PG
262## crypto layer version
263###############################################################################
264
265ENCRYPTION_PARAMETERS = \
c46c8670 266 { 0: \
dd23cbc9
PG
267 { "kdf": ("dummy", 16)
268 , "enc": "passthrough" }
c46c8670 269 , 1: \
dd23cbc9
PG
270 { "kdf": ( "scrypt"
271 , { "dkLen" : 16
272 , "N" : 1 << 16
273 , "r" : 8
274 , "p" : 1
275 , "NaCl_LEN" : 16 })
276 , "enc": "aes-gcm" } }
a393d9cb 277
00b3cd10
PG
278###############################################################################
279## constants
280###############################################################################
281
dd47d6a2 282PDTCRYPT_HDR_MAGIC = b"PDTCRYPT"
00b3cd10 283
dd47d6a2
PG
284PDTCRYPT_HDR_SIZE_MAGIC = 8 # 8
285PDTCRYPT_HDR_SIZE_VERSION = 2 # 10
286PDTCRYPT_HDR_SIZE_PARAMVERSION = 2 # 12
287PDTCRYPT_HDR_SIZE_NACL = 16 # 28
288PDTCRYPT_HDR_SIZE_IV = 12 # 40
289PDTCRYPT_HDR_SIZE_CTSIZE = 8 # 48
290PDTCRYPT_HDR_SIZE_TAG = 16 # 64 GCM auth tag
00b3cd10 291
dd47d6a2
PG
292PDTCRYPT_HDR_SIZE = PDTCRYPT_HDR_SIZE_MAGIC + PDTCRYPT_HDR_SIZE_VERSION \
293 + PDTCRYPT_HDR_SIZE_PARAMVERSION + PDTCRYPT_HDR_SIZE_NACL \
294 + PDTCRYPT_HDR_SIZE_IV + PDTCRYPT_HDR_SIZE_CTSIZE \
295 + PDTCRYPT_HDR_SIZE_TAG # = 64
00b3cd10
PG
296
297# precalculate offsets since Python can’t do constant folding over names
dd47d6a2
PG
298HDR_OFF_VERSION = PDTCRYPT_HDR_SIZE_MAGIC
299HDR_OFF_PARAMVERSION = HDR_OFF_VERSION + PDTCRYPT_HDR_SIZE_VERSION
300HDR_OFF_NACL = HDR_OFF_PARAMVERSION + PDTCRYPT_HDR_SIZE_PARAMVERSION
301HDR_OFF_IV = HDR_OFF_NACL + PDTCRYPT_HDR_SIZE_NACL
302HDR_OFF_CTSIZE = HDR_OFF_IV + PDTCRYPT_HDR_SIZE_IV
303HDR_OFF_TAG = HDR_OFF_CTSIZE + PDTCRYPT_HDR_SIZE_CTSIZE
00b3cd10
PG
304
305FMT_UINT16_LE = "<H"
306FMT_UINT64_LE = "<Q"
50710d86 307FMT_I2N_IV = "<8sL" # 8 random bytes ‖ 32 bit counter
83f2d71e
PG
308FMT_I2N_HDR = ("<" # host byte order
309 "8s" # magic
310 "H" # version
311 "H" # paramversion
312 "16s" # sodium chloride
313 "12s" # iv
3b53fb98
PG
314 "Q" # size
315 "16s") # GCM tag
00b3cd10
PG
316
317# aes+gcm
cb7a3911
PG
318AES_GCM_MAX_SIZE = (1 << 36) - (1 << 5) # 2^39 - 2^8 b ≅ 64 GB
319PDTCRYPT_MAX_OBJ_SIZE_DEFAULT = 63 * (1 << 30) # 63 GB
320PDTCRYPT_MAX_OBJ_SIZE = PDTCRYPT_MAX_OBJ_SIZE_DEFAULT
00b3cd10 321
3031b7ae
PG
322# index and info files are written on-the fly while encrypting so their
323# counters must be available inadvance
cb7a3911
PG
324AES_GCM_IV_CNT_INFOFILE = 1 # constant
325AES_GCM_IV_CNT_INDEX = AES_GCM_IV_CNT_INFOFILE + 1
326AES_GCM_IV_CNT_DATA = AES_GCM_IV_CNT_INDEX + 1 # also for multivolume
327AES_GCM_IV_CNT_MAX_DEFAULT = 0xffFFffFF
328AES_GCM_IV_CNT_MAX = AES_GCM_IV_CNT_MAX_DEFAULT
2d6fd8c8 329
be124bca
PG
330# IV structure and generation
331PDTCRYPT_IV_GEN_MAX_RETRIES = 10 # ×
332PDTCRYPT_IV_FIXEDPART_SIZE = 8 # B
333PDTCRYPT_IV_COUNTER_SIZE = 4 # B
39accaaa 334
00b3cd10 335###############################################################################
39accaaa 336## header, trailer
00b3cd10
PG
337###############################################################################
338#
339# Interface:
340#
341# struct hdrinfo
342# { version : u16
343# , paramversion : u16
344# , nacl : [u8; 16]
345# , iv : [u8; 12]
704ceaa5
PG
346# , ctsize : usize
347# , tag : [u8; 16] }
83f2d71e 348#
00b3cd10 349# fn hdr_read (f : handle) -> hdrinfo;
c2d1c3ec 350# fn hdr_make (f : handle, h : hdrinfo) -> IOResult<usize>;
00b3cd10
PG
351# fn hdr_fmt (h : hdrinfo) -> String;
352#
353
83f2d71e 354def hdr_read (data):
704ceaa5
PG
355 """
356 Read bytes as header structure.
357
358 If the input could not be interpreted as a header, fail with
359 ``InvalidHeader``.
360 """
83f2d71e 361
00b3cd10 362 try:
3b53fb98 363 mag, version, paramversion, nacl, iv, ctsize, tag = \
83f2d71e
PG
364 struct.unpack (FMT_I2N_HDR, data)
365 except Exception as exn:
15d3eefd
PG
366 raise InvalidHeader ("error unpacking header from [%r]: %s"
367 % (binascii.hexlify (data), str (exn)))
00b3cd10 368
dd47d6a2 369 if mag != PDTCRYPT_HDR_MAGIC:
15d3eefd 370 raise InvalidHeader ("bad magic in header: expected [%s], got [%s]"
dd47d6a2 371 % (PDTCRYPT_HDR_MAGIC, mag))
00b3cd10 372
15d3eefd 373 return \
00b3cd10
PG
374 { "version" : version
375 , "paramversion" : paramversion
376 , "nacl" : nacl
377 , "iv" : iv
378 , "ctsize" : ctsize
3b53fb98 379 , "tag" : tag
00b3cd10
PG
380 }
381
382
39accaaa 383def hdr_read_stream (instr):
704ceaa5
PG
384 """
385 Read header from stream at the current position.
386
387 Fail with ``InvalidHeader`` if insufficient bytes were read from the
388 stream, or if the content could not be interpreted as a header.
389 """
dd47d6a2 390 data = instr.read(PDTCRYPT_HDR_SIZE)
ae3d0f2a 391 ldata = len (data)
8a8ac469
PG
392 if ldata == 0:
393 raise EndOfFile
394 elif ldata != PDTCRYPT_HDR_SIZE:
395 raise InvalidHeader ("hdr_read_stream: expected %d B, received %d B"
396 % (PDTCRYPT_HDR_SIZE, ldata))
47e27926 397 return hdr_read (data)
39accaaa
PG
398
399
3b53fb98 400def hdr_from_params (version, paramversion, nacl, iv, ctsize, tag):
704ceaa5
PG
401 """
402 Assemble the necessary values into a PDTCRYPT header.
403
404 :type version: int to fit uint16_t
405 :type paramversion: int to fit uint16_t
406 :type nacl: bytes to fit uint8_t[16]
407 :type iv: bytes to fit uint8_t[12]
408 :type size: int to fit uint64_t
409 :type tag: bytes to fit uint8_t[16]
410 """
dd47d6a2 411 buf = bytearray (PDTCRYPT_HDR_SIZE)
83f2d71e 412 bufv = memoryview (buf)
00b3cd10 413
00b3cd10 414 try:
83f2d71e 415 struct.pack_into (FMT_I2N_HDR, bufv, 0,
dd47d6a2 416 PDTCRYPT_HDR_MAGIC,
3b53fb98 417 version, paramversion, nacl, iv, ctsize, tag)
83f2d71e 418 except Exception as exn:
a83fa4ed 419 return False, "error assembling header: %s" % str (exn)
00b3cd10 420
83f2d71e 421 return True, bytes (buf)
00b3cd10 422
00b3cd10 423
8a990744
PG
424def hdr_make_dummy (s):
425 """
426 Create a header sized block of bytes initialized to a value derived from a
427 string. Used to verify we’ve jumped back correctly to the actual position
428 of the object header.
429 """
430 c = reduce (lambda a, c: a + ord(c), s, 0) % 0xFF
dd47d6a2 431 return bytes (bytearray (struct.pack ("B", c)) * PDTCRYPT_HDR_SIZE)
8a990744
PG
432
433
a393d9cb 434def hdr_make (hdr):
704ceaa5
PG
435 """
436 Assemble a header from the given header structure.
437 """
a393d9cb
PG
438 return hdr_from_params (version=hdr.get("version"),
439 paramversion=hdr.get("paramversion"),
440 nacl=hdr.get("nacl"), iv=hdr.get("iv"),
3b53fb98 441 ctsize=hdr.get("ctsize"), tag=hdr.get("tag"))
a393d9cb
PG
442
443
83f2d71e 444HDR_FMT = "I2n_header { version: %d, paramversion: %d, nacl: %s[%d]," \
89131745 445 " iv: %s[%d], ctsize: %d, tag: %s[%d] }"
00b3cd10 446
83f2d71e 447def hdr_fmt (h):
704ceaa5 448 """Format a header structure into readable output."""
83f2d71e
PG
449 return HDR_FMT % (h["version"], h["paramversion"],
450 binascii.hexlify (h["nacl"]), len(h["nacl"]),
451 binascii.hexlify (h["iv"]), len(h["iv"]),
db1f3ac7
PG
452 h["ctsize"],
453 binascii.hexlify (h["tag"]), len(h["tag"]))
00b3cd10 454
00b3cd10 455
83f2d71e 456def hex_spaced_of_bytes (b):
704ceaa5 457 """Format bytes object, hexdump style."""
83f2d71e
PG
458 return " ".join ([ "%.2x%.2x" % (c1, c2)
459 for c1, c2 in zip (b[0::2], b[1::2]) ]) \
460 + (len (b) | 1 == len (b) and " %.2x" % b[-1] or "") # odd lengths
00b3cd10 461
591a722f 462
3031b7ae
PG
463def hdr_iv_counter (h):
464 """Extract the variable part of the IV of the given header."""
465 _fixed, cnt = struct.unpack (FMT_I2N_IV, h ["iv"])
466 return cnt
467
468
469def hdr_iv_fixed (h):
470 """Extract the fixed part of the IV of the given header."""
471 fixed, _cnt = struct.unpack (FMT_I2N_IV, h ["iv"])
472 return fixed
473
474
83f2d71e 475hdr_dump = hex_spaced_of_bytes
00b3cd10 476
00b3cd10 477
15d3eefd
PG
478HDR_FMT_PRETTY = \
479"""version = %-4d : %s
480paramversion = %-4d : %s
481nacl : %s
482iv : %s
483ctsize = %-20d : %s
484tag : %s
83f2d71e 485"""
00b3cd10 486
83f2d71e 487def hdr_fmt_pretty (h):
704ceaa5
PG
488 """
489 Format header structure into multi-line representation of its contents and
490 their raw representation. (Omit the implicit “PDTCRYPT” magic bytes that
491 precede every header.)
492 """
83f2d71e
PG
493 return HDR_FMT_PRETTY \
494 % (h["version"],
495 hex_spaced_of_bytes (struct.pack (FMT_UINT16_LE, h["version"])),
496 h["paramversion"],
497 hex_spaced_of_bytes (struct.pack (FMT_UINT16_LE, h["paramversion"])),
498 hex_spaced_of_bytes (h["nacl"]),
499 hex_spaced_of_bytes (h["iv"]),
500 h["ctsize"],
15d3eefd
PG
501 hex_spaced_of_bytes (struct.pack (FMT_UINT64_LE, h["ctsize"])),
502 hex_spaced_of_bytes (h["tag"]))
00b3cd10 503
f6cd676f
PG
504IV_FMT = "((f %s) (c %d))"
505
506def iv_fmt (iv):
704ceaa5 507 """Format the two components of an IV in a readable fashion."""
f6cd676f
PG
508 fixed, cnt = struct.unpack (FMT_I2N_IV, iv)
509 return IV_FMT % (binascii.hexlify (fixed), cnt)
510
00b3cd10 511
00b3cd10 512###############################################################################
f41973a6
PG
513## restoration
514###############################################################################
515
516class Location (object):
517 n = 0
518 offset = 0
519
520def restore_loc_fmt (loc):
521 return "%d off:%d" \
522 % (loc.n, loc.offset)
523
524def locate_hdr_candidates (fd):
525 """
526 Walk over instances of the magic string in the payload, collecting their
527 positions. If the offset of the first found instance is not zero, the file
528 begins with leading garbage.
529
530 :return: The list of offsets in the file.
531 """
532 cands = []
533
534 mm = mmap.mmap(fd, 0, mmap.MAP_SHARED, mmap.PROT_READ)
535 pos = 0
536 while True:
537 pos = mm.find (PDTCRYPT_HDR_MAGIC, pos)
538 if pos == -1:
539 break
540 cands.append (pos)
541 pos += 1
542
543 return cands
544
545
6c8073ab
PG
546HDR_CAND_GOOD = 0 # header marks begin of valid object
547HDR_CAND_FISHY = 1 # inconclusive (tag mismatch, obj overlap etc.)
548HDR_CAND_JUNK = 2 # not a header / object unreadable
549
550
551def inspect_hdr (fd, off):
552 """
553 Attempt to parse a header in *fd* at position *off*.
554
555 Returns a verdict about the quality of that header plus the parsed header
556 when readable.
557 """
558
559 _ = os.lseek (fd, off, os.SEEK_SET)
560
561 if os.lseek (fd, 0, os.SEEK_CUR) != off:
562 if PDTCRYPT_VERBOSE is True:
563 noise ("PDT: %d → dismissed (lseek() past EOF)" % off)
564 return HDR_CAND_JUNK, None
565
566 raw = os.read (fd, PDTCRYPT_HDR_SIZE)
567 if len (raw) != PDTCRYPT_HDR_SIZE:
568 if PDTCRYPT_VERBOSE is True:
569 noise ("PDT: %d → dismissed (EOF inside header)" % off)
570 return HDR_CAND_JUNK, None
571
572 try:
573 hdr = hdr_read (raw)
574 except InvalidHeader as exn:
575 if PDTCRYPT_VERBOSE is True:
576 noise ("PDT: %d → dismissed (invalid: [%s])" % (off, str (exn)))
577 return HDR_CAND_JUNK, None
578
579 obj0 = off + PDTCRYPT_HDR_SIZE
580 objX = obj0 + hdr ["ctsize"]
581
582 eof = os.lseek (fd, 0, os.SEEK_END)
583 if eof < objX:
584 if PDTCRYPT_VERBOSE is True:
585 noise ("PDT: %d → EOF inside object (%d≤%d≤%d); adjusting size to "
586 "%d" % (off, obj0, eof, objX, (eof - obj0)))
587 # try reading up to the end
588 hdr ["ctsize"] = eof - obj0
589 return HDR_CAND_FISHY, hdr
590
591 return HDR_CAND_GOOD, hdr
592
593
a808459e 594def try_decrypt (ifd, off, hdr, secret, ofd=-1):
6c8073ab 595 """
a808459e
PG
596 Attempt to decrypt the object in the (seekable) descriptor *ifd* starting
597 at *off* using the metadata in *hdr* and *secret*. An output fd can be
598 specified with *ofd*; if it is *-1* – the default –, the decrypted payload
599 will be discarded.
70a33834
PG
600
601 Always creates a fresh decryptor, so validation steps across objects don’t
602 apply.
202104ed
PG
603
604 Errors during GCM tag validation are ignored.
6c8073ab 605 """
70a33834
PG
606 ctleft = hdr ["ctsize"]
607 pos = off
608
609 ks = secret [0]
610 if ks == PDTCRYPT_SECRET_PW:
611 decr = Decrypt (password=secret [1])
612 elif ks == PDTCRYPT_SECRET_KEY:
613 key = binascii.unhexlify (secret [1])
614 decr = Decrypt (key=key)
615 else:
616 raise RuntimeError
617
70a33834
PG
618 decr.next (hdr)
619
620 try:
a808459e 621 os.lseek (ifd, pos, os.SEEK_SET)
70a33834
PG
622 while ctleft > 0:
623 cnksiz = min (ctleft, PDTCRYPT_BLOCKSIZE)
a808459e 624 cnk = os.read (ifd, cnksiz)
70a33834
PG
625 ctleft -= cnksiz
626 pos += cnksiz
a808459e
PG
627 pt = decr.process (cnk)
628 if ofd != -1:
629 os.write (ofd, pt)
202104ed
PG
630 try:
631 pt = decr.done ()
632 except InvalidGCMTag:
633 noise ("PDT: GCM tag mismatch for object %d–%d"
634 % (off, off + hdr ["ctsize"]))
a808459e
PG
635 if len (pt) > 0 and ofd != -1:
636 os.write (ofd, pt)
70a33834 637
70a33834
PG
638 except Exception as exn:
639 noise ("PDT: error decrypting object %d–%d@%d, %d B remaining [%s]"
640 % (off, off + hdr ["ctsize"], pos, ctleft, exn))
641 raise
6c8073ab 642
70a33834 643 return pos - off
6c8073ab
PG
644
645
6690f5e0
PG
646def readable_objects_offsets (ifd, secret, cands):
647 """
648 From a list of candidates, locate the ones that mark the start of actual
649 readable PDTCRYPT objects.
650 """
651 good = []
652 nobj = 0
653 for cand in cands:
654 nobj += 1
655 vdt, hdr = inspect_hdr (ifd, cand)
656 if vdt == HDR_CAND_JUNK:
657 pass # ignore unreadable ones
658 elif vdt in [HDR_CAND_GOOD, HDR_CAND_FISHY]:
659 off0 = cand + PDTCRYPT_HDR_SIZE
660 ok = try_decrypt (ifd, off0, hdr, secret) == hdr ["ctsize"]
661 if ok is True:
662 good.append (cand)
663 return good
664
665
666def reconstruct_offsets (fname, secret):
667 ifd = os.open (fname, os.O_RDONLY)
668
669 try:
670 cands = locate_hdr_candidates (ifd)
671 return readable_objects_offsets (ifd, secret, cands)
672 finally:
673 os.close (ifd)
674
675
f41973a6 676###############################################################################
6178061e
PG
677## passthrough / null encryption
678###############################################################################
679
680class PassthroughCipher (object):
681
682 tag = struct.pack ("<QQ", 0, 0)
683
684 def __init__ (self) : pass
685
686 def update (self, b) : return b
687
50710d86 688 def finalize (self) : return b""
6178061e
PG
689
690 def finalize_with_tag (self, _) : return b""
691
692###############################################################################
a393d9cb 693## convenience wrapper
00b3cd10
PG
694###############################################################################
695
c46c8670
PG
696
697def kdf_dummy (klen, password, _nacl):
704ceaa5
PG
698 """
699 Fake KDF for testing purposes that is called when parameter version zero is
700 encountered.
701 """
c46c8670
PG
702 q, r = divmod (klen, len (password))
703 if isinstance (password, bytes) is False:
704 password = password.encode ()
705 return password * q + password [:r], b""
706
707
708SCRYPT_KEY_MEMO = { } # static because needed for both the info file and the archive
709
710
711def kdf_scrypt (params, password, nacl):
704ceaa5
PG
712 """
713 Wrapper for the Scrypt KDF, corresponds to parameter version one. The
714 computation result is memoized based on the inputs to facilitate spawning
715 multiple encryption contexts.
716 """
c46c8670
PG
717 N = params["N"]
718 r = params["r"]
719 p = params["p"]
720 dkLen = params["dkLen"]
721
722 if nacl is None:
723 nacl = os.urandom (params["NaCl_LEN"])
724
725 key_parms = (password, nacl, N, r, p, dkLen)
726 global SCRYPT_KEY_MEMO
727 if key_parms not in SCRYPT_KEY_MEMO:
728 SCRYPT_KEY_MEMO [key_parms] = \
729 pylibscrypt.scrypt (password, nacl, N, r, p, dkLen)
730 return SCRYPT_KEY_MEMO [key_parms], nacl
a64085a8
PG
731
732
da82bc58 733def kdf_by_version (paramversion=None, defs=None):
704ceaa5
PG
734 """
735 Pick the KDF handler corresponding to the parameter version or the
736 definition set.
737
738 :rtype: function (password : str, nacl : str) -> str
739 """
da82bc58
PG
740 if paramversion is not None:
741 defs = ENCRYPTION_PARAMETERS.get(paramversion, None)
a64085a8 742 if defs is None:
1ed44e7b
PG
743 raise InvalidParameter ("no encryption parameters for version %r"
744 % paramversion)
a64085a8 745 (kdf, params) = defs["kdf"]
c46c8670
PG
746 fn = None
747 if kdf == "scrypt" : fn = kdf_scrypt
748 if kdf == "dummy" : fn = kdf_dummy
749 if fn is None:
a64085a8 750 raise ValueError ("key derivation method %r unknown" % kdf)
c46c8670 751 return partial (fn, params)
a64085a8
PG
752
753
b360b772
PG
754###############################################################################
755## SCRYPT hashing
756###############################################################################
757
758def scrypt_hashsource (pw, ins):
759 """
760 Calculate the SCRYPT hash from the password and the information contained
761 in the first header found in ``ins``.
762
763 This does not validate whether the first object is encrypted correctly.
764 """
c1ecc2e2
PG
765 if isinstance (pw, str) is True:
766 pw = str.encode (pw)
767 elif isinstance (pw, bytes) is False:
768 raise InvalidParameter ("password must be a string, not %s"
769 % type (password))
770 if isinstance (ins, io.BufferedReader) is False and \
771 isinstance (ins, io.FileIO) is False:
772 raise InvalidParameter ("file to hash must be opened in “binary” mode")
b360b772
PG
773 hdr = None
774 try:
775 hdr = hdr_read_stream (ins)
776 except EndOfFile as exn:
777 noise ("PDT: malformed input: end of file reading first object header")
778 noise ("PDT:")
779 return 1
780
781 nacl = hdr ["nacl"]
782 pver = hdr ["paramversion"]
783 if PDTCRYPT_VERBOSE is True:
784 noise ("PDT: salt of first object : %s" % binascii.hexlify (nacl))
785 noise ("PDT: parameter version of archive : %d" % pver)
786
787 try:
788 defs = ENCRYPTION_PARAMETERS.get(pver, None)
789 kdfname, params = defs ["kdf"]
790 if kdfname != "scrypt":
791 noise ("PDT: input is not an SCRYPT archive")
792 noise ("")
793 return 1
794 kdf = kdf_by_version (None, defs)
795 except ValueError as exn:
796 noise ("PDT: object has unknown parameter version %d" % pver)
797
798 hsh, _void = kdf (pw, nacl)
799
c1ecc2e2 800 return hsh, nacl, hdr ["version"], pver
b360b772
PG
801
802
803def scrypt_hashfile (pw, fname):
704ceaa5
PG
804 """
805 Calculate the SCRYPT hash from the password and the information contained
806 in the first header found in the given file. The header is read only at
807 offset zero.
808 """
b360b772 809 with deptdcrypt_mk_stream (PDTCRYPT_SOURCE, fname or "-") as ins:
c1ecc2e2 810 hsh, _void, _void, _void = scrypt_hashsource (pw, ins)
b360b772
PG
811 return hsh
812
813
814###############################################################################
815## AES-GCM context
816###############################################################################
817
a393d9cb
PG
818class Crypto (object):
819 """
820 Encryption context to remain alive throughout an entire tarfile pass.
821 """
6178061e 822 enc = None
a393d9cb
PG
823 nacl = None
824 key = None
50710d86
PG
825 cnt = None # file counter (uint32_t != 0)
826 iv = None # current IV
30019abf
PG
827 fixed = None # accu for 64 bit fixed parts of IV
828 used_ivs = None # tracks IVs
829 strict_ivs = False # if True, panic on duplicate object IV
48db09ba
PG
830 password = None
831 paramversion = None
633b18a9
PG
832 stats = { "in" : 0
833 , "out" : 0
834 , "obj" : 0 }
fa47412e 835
fa47412e
PG
836 ctsize = -1
837 ptsize = -1
3031b7ae
PG
838 info_counter_used = False
839 index_counter_used = False
a393d9cb 840
a64085a8 841 def __init__ (self, *al, **akv):
30019abf 842 self.used_ivs = set ()
a64085a8 843 self.set_parameters (*al, **akv)
39accaaa
PG
844
845
704ceaa5 846 def next_fixed (self):
be124bca 847 # NOP for decryption
50710d86
PG
848 pass
849
850
851 def set_object_counter (self, cnt=None):
704ceaa5
PG
852 """
853 Safely set the internal counter of encrypted objects. Numerous
854 constraints apply:
855
856 The same counter may not be reused in combination with one IV fixed
857 part. This is validated elsewhere in the IV handling.
858
859 Counter zero is invalid. The first two counters are reserved for
860 metadata. The implementation does not allow for splitting metadata
861 files over multiple encrypted objects. (This would be possible by
862 assigning new fixed parts.) Thus in a Deltatar backup there is at most
863 one object with a counter value of one and two. On creation of a
864 context, the initial counter may be chosen. The globals
865 ``AES_GCM_IV_CNT_INFOFILE`` and ``AES_GCM_IV_CNT_INDEX`` can be used to
866 request one of the reserved values. If one of these values has been
867 used, any further attempt of setting the counter to that value will
868 be rejected with an ``InvalidFileCounter`` exception.
869
870 Out of bounds values (i. e. below one and more than the maximum of 2³²)
871 cause an ``InvalidParameter`` exception to be thrown.
872 """
50710d86
PG
873 if cnt is None:
874 self.cnt = AES_GCM_IV_CNT_DATA
875 return
876 if cnt == 0 or cnt > AES_GCM_IV_CNT_MAX + 1:
b12110dd
PG
877 raise InvalidParameter ("invalid counter value %d requested: "
878 "acceptable values are from 1 to %d"
879 % (cnt, AES_GCM_IV_CNT_MAX))
50710d86
PG
880 if cnt == AES_GCM_IV_CNT_INFOFILE:
881 if self.info_counter_used is True:
fac2cfe1
PG
882 raise InvalidFileCounter ("attempted to reuse info file "
883 "counter %d: must be unique" % cnt)
50710d86 884 self.info_counter_used = True
3031b7ae
PG
885 elif cnt == AES_GCM_IV_CNT_INDEX:
886 if self.index_counter_used is True:
fac2cfe1
PG
887 raise InvalidFileCounter ("attempted to reuse index file "
888 " counter %d: must be unique" % cnt)
3031b7ae 889 self.index_counter_used = True
50710d86
PG
890 if cnt <= AES_GCM_IV_CNT_MAX:
891 self.cnt = cnt
892 return
893 # cnt == AES_GCM_IV_CNT_MAX + 1 → wrap
894 self.cnt = AES_GCM_IV_CNT_DATA
704ceaa5 895 self.next_fixed ()
50710d86
PG
896
897
1f3fd7b0 898 def set_parameters (self, password=None, key=None, paramversion=None,
be124bca 899 nacl=None, counter=None, strict_ivs=False):
704ceaa5
PG
900 """
901 Configure the internal state of a crypto context. Not intended for
902 external use.
903 """
be124bca 904 self.next_fixed ()
50710d86 905 self.set_object_counter (counter)
30019abf
PG
906 self.strict_ivs = strict_ivs
907
a83fa4ed
PG
908 if paramversion is not None:
909 self.paramversion = paramversion
910
1f3fd7b0
PG
911 if key is not None:
912 self.key, self.nacl = key, nacl
913 return
914
a83fa4ed
PG
915 if password is not None:
916 if isinstance (password, bytes) is False:
917 password = str.encode (password)
918 self.password = password
919 if paramversion is None and nacl is None:
920 # postpone key setup until first header is available
921 return
922 kdf = kdf_by_version (paramversion)
923 if kdf is not None:
924 self.key, self.nacl = kdf (password, nacl)
fa47412e 925
39accaaa 926
39accaaa 927 def process (self, buf):
704ceaa5
PG
928 """
929 Encrypt / decrypt a buffer. Invokes the ``.update()`` method on the
930 wrapped encryptor or decryptor, respectively.
931
932 The Cryptography exception ``AlreadyFinalized`` is translated to an
933 ``InternalError`` at this point. It may occur in sound code when the GC
934 closes an encrypting stream after an error. Everywhere else it must be
935 treated as a bug.
936 """
cb7a3911
PG
937 if self.enc is None:
938 raise RuntimeError ("process: context not initialized")
939 self.stats ["in"] += len (buf)
fac2cfe1
PG
940 try:
941 out = self.enc.update (buf)
942 except cryptography.exceptions.AlreadyFinalized as exn:
943 raise InternalError (exn)
cb7a3911
PG
944 self.stats ["out"] += len (out)
945 return out
39accaaa
PG
946
947
30019abf 948 def next (self, password, paramversion, nacl, iv):
704ceaa5
PG
949 """
950 Prepare for encrypting another object: Reset the data counters and
951 change the configuration in case one of the variable parameters differs
952 from the last object. Also check the IV for duplicates and error out
953 if strict checking was requested.
954 """
fa47412e
PG
955 self.ctsize = 0
956 self.ptsize = 0
957 self.stats ["obj"] += 1
30019abf
PG
958
959 self.check_duplicate_iv (iv)
960
6178061e
PG
961 if ( self.paramversion != paramversion
962 or self.password != password
963 or self.nacl != nacl):
1f3fd7b0 964 self.set_parameters (password=password, paramversion=paramversion,
30019abf
PG
965 nacl=nacl, strict_ivs=self.strict_ivs)
966
967
968 def check_duplicate_iv (self, iv):
704ceaa5
PG
969 """
970 Add an IV (the 12 byte representation as in the header) to the list. With
971 strict checking enabled, this will throw a ``DuplicateIV``. Depending on
972 the context, this may indicate a serious error (IV reuse).
973 """
30019abf
PG
974 if self.strict_ivs is True and iv in self.used_ivs:
975 raise DuplicateIV ("iv %s was reused" % iv_fmt (iv))
976 # vi has not been used before; add to collection
977 self.used_ivs.add (iv)
fa47412e
PG
978
979
633b18a9 980 def counters (self):
704ceaa5
PG
981 """
982 Access the data counters.
983 """
633b18a9
PG
984 return self.stats ["obj"], self.stats ["in"], self.stats ["out"]
985
986
8de91f4f
PG
987 def drop (self):
988 """
989 Clear the current context regardless of its finalization state. The
990 next operation must be ``.next()``.
991 """
992 self.enc = None
993
994
39accaaa
PG
995class Encrypt (Crypto):
996
48db09ba
PG
997 lastinfo = None
998 version = None
72a42219 999 paramenc = None
50710d86 1000
1f3fd7b0 1001 def __init__ (self, version, paramversion, password=None, key=None, nacl=None,
30019abf 1002 counter=AES_GCM_IV_CNT_DATA, strict_ivs=True):
704ceaa5
PG
1003 """
1004 The ctor will throw immediately if one of the parameters does not conform
1005 to our expectations.
1006
1007 counter=AES_GCM_IV_CNT_DATA, strict_ivs=True):
1008 :type version: int to fit uint16_t
1009 :type paramversion: int to fit uint16_t
1010 :param password: mutually exclusive with ``key``
1011 :type password: bytes
1012 :param key: mutually exclusive with ``password``
1013 :type key: bytes
1014 :type nacl: bytes
1015 :type counter: initial object counter the values
1016 ``AES_GCM_IV_CNT_INFOFILE`` and
1017 ``AES_GCM_IV_CNT_INDEX`` are unique in each backup set
1018 and cannot be reused even with different fixed parts.
1019 :type strict_ivs: bool
1020 """
1f3fd7b0
PG
1021 if password is None and key is None \
1022 or password is not None and key is not None :
1023 raise InvalidParameter ("__init__: need either key or password")
1024
1025 if key is not None:
1026 if isinstance (key, bytes) is False:
1027 raise InvalidParameter ("__init__: key must be provided as "
1028 "bytes, not %s" % type (key))
1029 if nacl is None:
1030 raise InvalidParameter ("__init__: salt must be provided along "
1031 "with encryption key")
1032 else: # password, no key
1033 if isinstance (password, str) is False:
1034 raise InvalidParameter ("__init__: password must be a string, not %s"
1035 % type (password))
1036 if len (password) == 0:
1037 raise InvalidParameter ("__init__: supplied empty password but not "
1038 "permitted for PDT encrypted files")
36b9932a
PG
1039 # version
1040 if isinstance (version, int) is False:
1041 raise InvalidParameter ("__init__: version number must be an "
1042 "integer, not %s" % type (version))
1043 if version < 0:
1044 raise InvalidParameter ("__init__: version number must be a "
1045 "nonnegative integer, not %d" % version)
1046 # paramversion
1047 if isinstance (paramversion, int) is False:
1048 raise InvalidParameter ("__init__: crypto parameter version number "
1049 "must be an integer, not %s"
1050 % type (paramversion))
1051 if paramversion < 0:
1052 raise InvalidParameter ("__init__: crypto parameter version number "
1053 "must be a nonnegative integer, not %d"
1054 % paramversion)
1055 # salt
1056 if nacl is not None:
1057 if isinstance (nacl, bytes) is False:
1058 raise InvalidParameter ("__init__: salt given, but of type %s "
1059 "instead of bytes" % type (nacl))
1060 # salt length would depend on the actual encryption so it can’t be
1061 # validated at this point
b12110dd 1062 self.fixed = [ ]
48db09ba
PG
1063 self.version = version
1064 self.paramenc = ENCRYPTION_PARAMETERS.get (paramversion) ["enc"]
72a42219 1065
1f3fd7b0 1066 super().__init__ (password, key, paramversion, nacl, counter=counter,
30019abf 1067 strict_ivs=strict_ivs)
a393d9cb
PG
1068
1069
be124bca
PG
1070 def next_fixed (self, retries=PDTCRYPT_IV_GEN_MAX_RETRIES):
1071 """
1072 Generate the next IV fixed part by reading eight bytes from
1073 ``/dev/urandom``. The buffer so obtained is tested against the fixed
1074 parts used so far to prevent accidental reuse of IVs. After a
1075 configurable number of attempts to create a unique fixed part, it will
1076 refuse to continue with an ``IVFixedPartError``. This is unlikely to
1077 ever happen on a normal system but may detect an issue with the random
1078 generator.
1079
1080 The list of fixed parts that were used by the context at hand can be
1081 accessed through the ``.fixed`` list. Its last element is the fixed
1082 part currently in use.
1083 """
1084 i = 0
1085 while i < retries:
1086 fp = os.urandom (PDTCRYPT_IV_FIXEDPART_SIZE)
1087 if fp not in self.fixed:
1088 self.fixed.append (fp)
1089 return
1090 i += 1
1091 raise IVFixedPartError ("error obtaining a unique IV fixed part from "
1092 "/dev/urandom; giving up after %d tries" % i)
1093
1094
a393d9cb 1095 def iv_make (self):
704ceaa5
PG
1096 """
1097 Construct a 12-bytes IV from the current fixed part and the object
1098 counter.
1099 """
b12110dd 1100 return struct.pack(FMT_I2N_IV, self.fixed [-1], self.cnt)
a393d9cb
PG
1101
1102
cb7a3911 1103 def next (self, filename=None, counter=None):
704ceaa5
PG
1104 """
1105 Prepare for encrypting the next incoming object. Update the counter
1106 and put together the IV, possibly changing prefixes. Then create the
1107 new encryptor.
1108
1109 The argument ``counter`` can be used to specify a file counter for this
1110 object. Unless it is one of the reserved values, the counter of
1111 subsequent objects will be computed from this one.
1112
1113 If this is the first object in a series, ``filename`` is required,
1114 otherwise it is reused if not present. The value is used to derive a
1115 header sized placeholder to use until after encryption when all the
1116 inputs to construct the final header are available. This is then
1117 matched in ``.done()`` against the value found at the position of the
1118 header. The motivation for this extra check is primarily to assist
1119 format debugging: It makes stray headers easy to spot in malformed
1120 PDTCRYPT files.
1121 """
cb7a3911
PG
1122 if filename is None:
1123 if self.lastinfo is None:
1124 raise InvalidParameter ("next: filename is mandatory for "
1125 "first object")
1126 filename, _dummy = self.lastinfo
1127 else:
1128 if isinstance (filename, str) is False:
1129 raise InvalidParameter ("next: filename must be a string, no %s"
1130 % type (filename))
3031b7ae
PG
1131 if counter is not None:
1132 if isinstance (counter, int) is False:
1133 raise InvalidParameter ("next: the supplied counter is of "
1134 "invalid type %s; please pass an "
1135 "integer instead" % type (counter))
1136 self.set_object_counter (counter)
fac2cfe1 1137
50710d86 1138 self.iv = self.iv_make ()
72a42219 1139 if self.paramenc == "aes-gcm":
6178061e
PG
1140 self.enc = Cipher \
1141 ( algorithms.AES (self.key)
1142 , modes.GCM (self.iv)
1143 , backend = default_backend ()) \
1144 .encryptor ()
72a42219 1145 elif self.paramenc == "passthrough":
6178061e
PG
1146 self.enc = PassthroughCipher ()
1147 else:
b12110dd
PG
1148 raise InvalidParameter ("next: parameter version %d not known"
1149 % self.paramversion)
48db09ba
PG
1150 hdrdum = hdr_make_dummy (filename)
1151 self.lastinfo = (filename, hdrdum)
30019abf 1152 super().next (self.password, self.paramversion, self.nacl, self.iv)
72a42219 1153
3031b7ae 1154 self.set_object_counter (self.cnt + 1)
48db09ba 1155 return hdrdum
a393d9cb 1156
a393d9cb 1157
cd77dadb 1158 def done (self, cmpdata):
704ceaa5
PG
1159 """
1160 Complete encryption of an object. After this has been called, attempts
1161 of encrypting further data will cause an error until ``.next()`` is
1162 invoked properly.
1163
1164 Returns a 64 bytes buffer containing the object header including all
1165 values including the “late” ones e. g. the ciphertext size and the
1166 GCM tag.
1167 """
36b9932a
PG
1168 if isinstance (cmpdata, bytes) is False:
1169 raise InvalidParameter ("done: comparison input expected as bytes, "
1170 "not %s" % type (cmpdata))
cb7a3911
PG
1171 if self.lastinfo is None:
1172 raise RuntimeError ("done: encryption context not initialized")
48db09ba
PG
1173 filename, hdrdum = self.lastinfo
1174 if cmpdata != hdrdum:
b12110dd
PG
1175 raise RuntimeError ("done: bad sync of header for object %d: "
1176 "preliminary data does not match; this likely "
1177 "indicates a wrongly repositioned stream"
1178 % self.cnt)
6178061e 1179 data = self.enc.finalize ()
633b18a9 1180 self.stats ["out"] += len (data)
cd77dadb 1181 self.ctsize += len (data)
48db09ba
PG
1182 ok, hdr = hdr_from_params (self.version, self.paramversion, self.nacl,
1183 self.iv, self.ctsize, self.enc.tag)
8a990744 1184 if ok is False:
b12110dd
PG
1185 raise InternalError ("error constructing header: %r" % hdr)
1186 return data, hdr, self.fixed
a393d9cb 1187
a393d9cb 1188
cd77dadb 1189 def process (self, buf):
704ceaa5
PG
1190 """
1191 Encrypt a chunk of plaintext with the active encryptor. Returns the
1192 size of the input consumed. This **must** be checked downstream. If the
1193 maximum possible object size has been reached, the current context must
1194 be finalized and a new one established before any further data can be
1195 encrypted. The second argument is the remainder of the plaintext that
1196 was not encrypted for the caller to use immediately after the new
1197 context is ready.
1198 """
36b9932a
PG
1199 if isinstance (buf, bytes) is False:
1200 raise InvalidParameter ("process: expected byte buffer, not %s"
1201 % type (buf))
cb7a3911
PG
1202 bsize = len (buf)
1203 newptsize = self.ptsize + bsize
1204 diff = newptsize - PDTCRYPT_MAX_OBJ_SIZE
1205 if diff > 0:
1206 bsize -= diff
1207 newptsize = PDTCRYPT_MAX_OBJ_SIZE
1208 self.ptsize = newptsize
1209 data = super().process (buf [:bsize])
cd77dadb 1210 self.ctsize += len (data)
cb7a3911 1211 return bsize, data
cd77dadb
PG
1212
1213
39accaaa 1214class Decrypt (Crypto):
a393d9cb 1215
3031b7ae 1216 tag = None # GCM tag, part of header
3031b7ae 1217 last_iv = None # check consecutive ivs in strict mode
39accaaa 1218
1f3fd7b0 1219 def __init__ (self, password=None, key=None, counter=None, fixedparts=None,
ee6aa239 1220 strict_ivs=False):
704ceaa5
PG
1221 """
1222 Sanitizing ctor for the decryption context. ``fixedparts`` specifies a
1223 list of IV fixed parts accepted during decryption. If a fixed part is
1224 encountered that is not in the list, decryption will fail.
1225
1226 :param password: mutually exclusive with ``key``
1227 :type password: bytes
1228 :param key: mutually exclusive with ``password``
1229 :type key: bytes
1230 :type counter: initial object counter the values
1231 ``AES_GCM_IV_CNT_INFOFILE`` and
1232 ``AES_GCM_IV_CNT_INDEX`` are unique in each backup set
1233 and cannot be reused even with different fixed parts.
1234 :type fixedparts: bytes list
1235 """
1f3fd7b0
PG
1236 if password is None and key is None \
1237 or password is not None and key is not None :
1238 raise InvalidParameter ("__init__: need either key or password")
1239
1240 if key is not None:
1241 if isinstance (key, bytes) is False:
1242 raise InvalidParameter ("__init__: key must be provided as "
1243 "bytes, not %s" % type (key))
1244 else: # password, no key
1245 if isinstance (password, str) is False:
1246 raise InvalidParameter ("__init__: password must be a string, not %s"
1247 % type (password))
1248 if len (password) == 0:
1249 raise InvalidParameter ("__init__: supplied empty password but not "
1250 "permitted for PDT encrypted files")
36b9932a 1251 # fixed parts
50710d86 1252 if fixedparts is not None:
36b9932a
PG
1253 if isinstance (fixedparts, list) is False:
1254 raise InvalidParameter ("__init__: IV fixed parts must be "
1255 "supplied as list, not %s"
1256 % type (fixedparts))
b12110dd
PG
1257 self.fixed = fixedparts
1258 self.fixed.sort ()
ee6aa239 1259
a83fa4ed
PG
1260 super().__init__ (password=password, key=key, counter=counter,
1261 strict_ivs=strict_ivs)
39accaaa
PG
1262
1263
b12110dd 1264 def valid_fixed_part (self, iv):
704ceaa5
PG
1265 """
1266 Check if a fixed part was already seen.
1267 """
50710d86 1268 # check if fixed part is known
b12110dd
PG
1269 fixed, _cnt = struct.unpack (FMT_I2N_IV, iv)
1270 i = bisect.bisect_left (self.fixed, fixed)
1271 return i != len (self.fixed) and self.fixed [i] == fixed
50710d86
PG
1272
1273
ee6aa239 1274 def check_consecutive_iv (self, iv):
704ceaa5
PG
1275 """
1276 Check whether the counter part of the given IV is indeed the successor
1277 of the currently present counter. This should always be the case for
1278 the objects in a well formed PDT archive but should not be enforced
1279 when decrypting out-of-order.
1280 """
ee6aa239 1281 fixed, cnt = struct.unpack (FMT_I2N_IV, iv)
3031b7ae
PG
1282 if self.strict_ivs is True \
1283 and self.last_iv is not None \
ee6aa239
PG
1284 and self.last_iv [0] == fixed \
1285 and self.last_iv [1] != cnt - 1:
f6cd676f 1286 raise NonConsecutiveIV ("iv %s counter not successor of "
ee6aa239 1287 "last object (expected %d, found %d)"
f6cd676f 1288 % (iv_fmt (self.last_iv [1]), cnt))
ee6aa239
PG
1289 self.last_iv = (iv, cnt)
1290
1291
79782fa9 1292 def next (self, hdr):
704ceaa5
PG
1293 """
1294 Start decrypting the next object. The PDTCRYPT header for the object
1295 can be given either as already parsed object or as bytes.
1296 """
dccfe104
PG
1297 if isinstance (hdr, bytes) is True:
1298 hdr = hdr_read (hdr)
36b9932a
PG
1299 elif isinstance (hdr, dict) is False:
1300 # this won’t catch malformed specs though
1301 raise InvalidParameter ("next: wrong type of parameter hdr: "
1302 "expected bytes or spec, got %s"
fbfda3d4 1303 % type (hdr))
36b9932a
PG
1304 try:
1305 paramversion = hdr ["paramversion"]
1306 nacl = hdr ["nacl"]
1307 iv = hdr ["iv"]
1308 tag = hdr ["tag"]
1309 except KeyError:
1310 raise InvalidHeader ("next: not a header %r" % hdr)
1311
30019abf 1312 super().next (self.password, paramversion, nacl, iv)
b12110dd 1313 if self.fixed is not None and self.valid_fixed_part (iv) is False:
f6cd676f
PG
1314 raise InvalidIVFixedPart ("iv %s has invalid fixed part"
1315 % iv_fmt (iv))
3031b7ae 1316 self.check_consecutive_iv (iv)
ee6aa239 1317
36b9932a 1318 self.tag = tag
b12110dd
PG
1319 defs = ENCRYPTION_PARAMETERS.get (paramversion, None)
1320 if defs is None:
1321 raise FormatError ("header contains unknown parameter version %d; "
1322 "maybe the file was created by a more recent "
1323 "version of Deltatar" % paramversion)
50710d86 1324 enc = defs ["enc"]
6178061e
PG
1325 if enc == "aes-gcm":
1326 self.enc = Cipher \
1327 ( algorithms.AES (self.key)
36b9932a 1328 , modes.GCM (iv, tag=self.tag)
6178061e
PG
1329 , backend = default_backend ()) \
1330 . decryptor ()
1331 elif enc == "passthrough":
1332 self.enc = PassthroughCipher ()
1333 else:
b12110dd
PG
1334 raise InternalError ("encryption parameter set %d refers to unknown "
1335 "mode %r" % (paramversion, enc))
f484f2d1 1336 self.set_object_counter (self.cnt + 1)
39accaaa
PG
1337
1338
db1f3ac7 1339 def done (self, tag=None):
704ceaa5
PG
1340 """
1341 Stop decryption of the current object and finalize it with the active
1342 context. This will throw an *InvalidGCMTag* exception to indicate that
1343 the authentication tag does not match the data. If the tag is correct,
1344 the rest of the plaintext is returned.
1345 """
633b18a9 1346 data = b""
db1f3ac7
PG
1347 try:
1348 if tag is None:
f484f2d1 1349 data = self.enc.finalize ()
db1f3ac7 1350 else:
36b9932a
PG
1351 if isinstance (tag, bytes) is False:
1352 raise InvalidParameter ("done: wrong type of parameter "
1353 "tag: expected bytes, got %s"
1354 % type (tag))
f484f2d1 1355 data = self.enc.finalize_with_tag (self.tag)
b0078f26 1356 except cryptography.exceptions.InvalidTag:
f08c604b 1357 raise InvalidGCMTag ("done: tag mismatch of object %d: %s "
b0078f26 1358 "rejected by finalize ()"
f08c604b 1359 % (self.cnt, binascii.hexlify (self.tag)))
50710d86 1360 self.ctsize += len (data)
633b18a9 1361 self.stats ["out"] += len (data)
b0078f26 1362 return data
00b3cd10
PG
1363
1364
47e27926 1365 def process (self, buf):
704ceaa5
PG
1366 """
1367 Decrypt the bytes object *buf* with the active decryptor.
1368 """
36b9932a
PG
1369 if isinstance (buf, bytes) is False:
1370 raise InvalidParameter ("process: expected byte buffer, not %s"
1371 % type (buf))
47e27926
PG
1372 self.ctsize += len (buf)
1373 data = super().process (buf)
1374 self.ptsize += len (data)
1375 return data
1376
1377
00b3cd10 1378###############################################################################
770173c5
PG
1379## testing helpers
1380###############################################################################
1381
cb7a3911 1382def _patch_global (glob, vow, n=None):
770173c5
PG
1383 """
1384 Adapt upper file counter bound for testing IV logic. Completely unsafe.
1385 """
1386 assert vow == "I am fully aware that this will void my warranty."
cb7a3911
PG
1387 r = globals () [glob]
1388 if n is None:
1389 n = globals () [glob + "_DEFAULT"]
1390 globals () [glob] = n
770173c5
PG
1391 return r
1392
cb7a3911
PG
1393_testing_set_AES_GCM_IV_CNT_MAX = \
1394 partial (_patch_global, "AES_GCM_IV_CNT_MAX")
1395
1396_testing_set_PDTCRYPT_MAX_OBJ_SIZE = \
1397 partial (_patch_global, "PDTCRYPT_MAX_OBJ_SIZE")
1398
a808459e
PG
1399def open2_dump_file (fname, dir_fd, force=False):
1400 outfd = -1
1401
1402 oflags = os.O_CREAT | os.O_WRONLY
6690f5e0 1403 if force is True:
a808459e
PG
1404 oflags |= os.O_TRUNC
1405 else:
1406 oflags |= os.O_EXCL
1407
1408 try:
1409 outfd = os.open (fname, oflags,
1410 stat.S_IRUSR | stat.S_IWUSR, dir_fd=dir_fd)
1411 except FileExistsError as exn:
1412 noise ("PDT: refusing to overwrite existing file %s" % fname)
1413 noise ("")
1414 raise RuntimeError ("destination file %s already exists" % fname)
1415 if PDTCRYPT_VERBOSE is True:
1416 noise ("PDT: new output file %s (fd=%d)" % (fname, outfd))
1417
1418 return outfd
1419
770173c5 1420###############################################################################
00b3cd10
PG
1421## freestanding invocation
1422###############################################################################
1423
da82bc58
PG
1424PDTCRYPT_SUB_PROCESS = 0
1425PDTCRYPT_SUB_SCRYPT = 1
f41973a6 1426PDTCRYPT_SUB_SCAN = 2
da82bc58
PG
1427
1428PDTCRYPT_SUB = \
1429 { "process" : PDTCRYPT_SUB_PROCESS
f41973a6
PG
1430 , "scrypt" : PDTCRYPT_SUB_SCRYPT
1431 , "scan" : PDTCRYPT_SUB_SCAN }
da82bc58 1432
a83fa4ed
PG
1433PDTCRYPT_SECRET_PW = 0
1434PDTCRYPT_SECRET_KEY = 1
1435
e3abcdf0
PG
1436PDTCRYPT_DECRYPT = 1 << 0 # decrypt archive with password
1437PDTCRYPT_SPLIT = 1 << 1 # split archive into individual objects
da82bc58 1438PDTCRYPT_HASH = 1 << 2 # output scrypt hash for file and given password
e3abcdf0 1439
a808459e
PG
1440PDTCRYPT_SPLITNAME = "pdtcrypt-object-%d.bin"
1441PDTCRYPT_RESCUENAME = "pdtcrypt-rescue-object-%0.5d.bin"
e3abcdf0 1442
70ad9458 1443PDTCRYPT_VERBOSE = False
ee6aa239 1444PDTCRYPT_STRICTIVS = False
b07633d3 1445PDTCRYPT_OVERWRITE = False
15d3eefd 1446PDTCRYPT_BLOCKSIZE = 1 << 12
70ad9458
PG
1447PDTCRYPT_SINK = 0
1448PDTCRYPT_SOURCE = 1
1449SELF = None
1450
77058bab
PG
1451PDTCRYPT_DEFAULT_VER = 1
1452PDTCRYPT_DEFAULT_PVER = 1
1453
7b3940e5
PG
1454# scrypt hashing output control
1455PDTCRYPT_SCRYPT_INTRANATOR = 0
1456PDTCRYPT_SCRYPT_PARAMETERS = 1
4f6405d6 1457PDTCRYPT_SCRYPT_DEFAULT = PDTCRYPT_SCRYPT_INTRANATOR
7b3940e5
PG
1458
1459PDTCRYPT_SCRYPT_FORMAT = \
1460 { "i2n" : PDTCRYPT_SCRYPT_INTRANATOR
1461 , "params" : PDTCRYPT_SCRYPT_PARAMETERS }
1462
4c62ddc0 1463PDTCRYPT_TT_COLUMNS = 80 # assume standard terminal
15d3eefd
PG
1464
1465class PDTDecryptionError (Exception):
1466 """Decryption failed."""
1467
e3abcdf0
PG
1468class PDTSplitError (Exception):
1469 """Decryption failed."""
1470
15d3eefd
PG
1471
1472def noise (*a, **b):
591a722f 1473 print (file=sys.stderr, *a, **b)
15d3eefd
PG
1474
1475
89e1073c
PG
1476class PassthroughDecryptor (object):
1477
1478 curhdr = None # write current header on first data write
1479
1480 def __init__ (self):
1481 if PDTCRYPT_VERBOSE is True:
1482 noise ("PDT: no encryption; data passthrough")
1483
1484 def next (self, hdr):
1485 ok, curhdr = hdr_make (hdr)
1486 if ok is False:
1487 raise PDTDecryptionError ("bad header %r" % hdr)
1488 self.curhdr = curhdr
1489
1490 def done (self):
1491 if self.curhdr is not None:
1492 return self.curhdr
1493 return b""
1494
1495 def process (self, d):
1496 if self.curhdr is not None:
1497 d = self.curhdr + d
1498 self.curhdr = None
1499 return d
1500
1501
a83fa4ed 1502def depdtcrypt (mode, secret, ins, outs):
15d3eefd 1503 """
a83fa4ed
PG
1504 Remove PDTCRYPT layer from all objects encrypted with the secret. Used on a
1505 Deltatar backup this will yield a (possibly Gzip compressed) tarball.
15d3eefd
PG
1506 """
1507 ctleft = -1 # length of ciphertext to consume
1508 ctcurrent = 0 # total ciphertext of current object
15d3eefd
PG
1509 total_obj = 0 # total number of objects read
1510 total_pt = 0 # total plaintext bytes
1511 total_ct = 0 # total ciphertext bytes
1512 total_read = 0 # total bytes read
e3abcdf0
PG
1513 outfile = None # Python file object for output
1514
89e1073c 1515 if mode & PDTCRYPT_DECRYPT: # decryptor
a83fa4ed
PG
1516 ks = secret [0]
1517 if ks == PDTCRYPT_SECRET_PW:
1518 decr = Decrypt (password=secret [1], strict_ivs=PDTCRYPT_STRICTIVS)
1519 elif ks == PDTCRYPT_SECRET_KEY:
1520 key = binascii.unhexlify (secret [1])
1521 decr = Decrypt (key=key, strict_ivs=PDTCRYPT_STRICTIVS)
1522 else:
1523 raise InternalError ("‘%d’ does not specify a valid kind of secret"
1524 % ks)
89e1073c
PG
1525 else:
1526 decr = PassthroughDecryptor ()
1527
e3abcdf0
PG
1528 def nextout (_):
1529 """Dummy for non-split mode: output file does not vary."""
1530 return outs
1531
1532 if mode & PDTCRYPT_SPLIT:
1533 def nextout (outfile):
1534 """
1535 We were passed an fd as outs for accessing the destination
1536 directory where extracted archive components are supposed
1537 to end up in.
1538 """
1539
1540 if outfile is None:
1541 if PDTCRYPT_VERBOSE is True:
1542 noise ("PDT: no output file to close at this point")
77058bab
PG
1543 else:
1544 if PDTCRYPT_VERBOSE is True:
1545 noise ("PDT: release output file %r" % outfile)
e3abcdf0
PG
1546 # cleanup happens automatically by the GC; the next
1547 # line will error out on account of an invalid fd
1548 #outfile.close ()
1549
1550 assert total_obj > 0
1551 fname = PDTCRYPT_SPLITNAME % total_obj
1552 try:
a808459e
PG
1553 outfd = open2_dump_file (fname, outs, force=PDTCRYPT_OVERWRITE)
1554 except RuntimeError as exn:
1555 raise PDTSplitError (exn)
e3abcdf0
PG
1556 return os.fdopen (outfd, "wb", closefd=True)
1557
15d3eefd 1558
47d22679 1559 def tell (s):
b09a99eb 1560 """ESPIPE is normal on non-seekable stdio stream."""
47d22679
PG
1561 try:
1562 return s.tell ()
1563 except OSError as exn:
b09a99eb 1564 if exn.errno == os.errno.ESPIPE:
47d22679
PG
1565 return -1
1566
e3abcdf0 1567 def out (pt, outfile):
15d3eefd
PG
1568 npt = len (pt)
1569 nonlocal total_pt
1570 total_pt += npt
70ad9458 1571 if PDTCRYPT_VERBOSE is True:
15d3eefd
PG
1572 noise ("PDT:\t· decrypt plaintext %d B" % (npt))
1573 try:
e3abcdf0 1574 nn = outfile.write (pt)
15d3eefd
PG
1575 except OSError as exn: # probably ENOSPC
1576 raise DecryptionError ("error (%s)" % exn)
1577 if nn != npt:
1578 raise DecryptionError ("write aborted after %d of %d B" % (nn, npt))
1579
1580 while True:
1581 if ctleft <= 0:
1582 # current object completed; in a valid archive this marks either
1583 # the start of a new header or the end of the input
1584 if ctleft == 0: # current object requires finalization
70ad9458 1585 if PDTCRYPT_VERBOSE is True:
47d22679 1586 noise ("PDT: %d finalize" % tell (ins))
5d394c0d
PG
1587 try:
1588 pt = decr.done ()
1589 except InvalidGCMTag as exn:
f08c604b
PG
1590 raise DecryptionError ("error finalizing object %d (%d B): "
1591 "%r" % (total_obj, len (pt), exn)) \
1592 from exn
e3abcdf0 1593 out (pt, outfile)
70ad9458 1594 if PDTCRYPT_VERBOSE is True:
15d3eefd
PG
1595 noise ("PDT:\t· object validated")
1596
70ad9458 1597 if PDTCRYPT_VERBOSE is True:
47d22679 1598 noise ("PDT: %d hdr" % tell (ins))
15d3eefd
PG
1599 try:
1600 hdr = hdr_read_stream (ins)
dd47d6a2 1601 total_read += PDTCRYPT_HDR_SIZE
ae3d0f2a
PG
1602 except EndOfFile as exn:
1603 total_read += exn.remainder
dd47d6a2 1604 if total_ct + total_obj * PDTCRYPT_HDR_SIZE != total_read:
15d3eefd
PG
1605 raise PDTDecryptionError ("ciphertext processed (%d B) plus "
1606 "overhead (%d × %d B) does not match "
1607 "the number of bytes read (%d )"
dd47d6a2 1608 % (total_ct, total_obj, PDTCRYPT_HDR_SIZE,
15d3eefd
PG
1609 total_read))
1610 # the single good exit
1611 return total_read, total_obj, total_ct, total_pt
1612 except InvalidHeader as exn:
1613 raise PDTDecryptionError ("invalid header at position %d in %r "
ee6aa239 1614 "(%s)" % (tell (ins), exn, ins))
70ad9458 1615 if PDTCRYPT_VERBOSE is True:
15d3eefd
PG
1616 pretty = hdr_fmt_pretty (hdr)
1617 noise (reduce (lambda a, e: (a + "\n" if a else "") + "PDT:\t· " + e,
1618 pretty.splitlines (), ""))
1619 ctcurrent = ctleft = hdr ["ctsize"]
89e1073c 1620
15d3eefd 1621 decr.next (hdr)
e3abcdf0
PG
1622
1623 total_obj += 1 # used in file counter with split mode
1624
1625 # finalization complete or skipped in case of first object in
1626 # stream; create a new output file if necessary
1627 outfile = nextout (outfile)
15d3eefd 1628
70ad9458 1629 if PDTCRYPT_VERBOSE is True:
15d3eefd 1630 noise ("PDT: %d decrypt obj no. %d, %d B"
47d22679 1631 % (tell (ins), total_obj, ctleft))
15d3eefd
PG
1632
1633 # always allocate a new buffer since python-cryptography doesn’t allow
1634 # passing a bytearray :/
1635 nexpect = min (ctleft, PDTCRYPT_BLOCKSIZE)
70ad9458 1636 if PDTCRYPT_VERBOSE is True:
15d3eefd 1637 noise ("PDT:\t· [%d] %d%% done, read block (%d B of %d B remaining)"
47d22679 1638 % (tell (ins),
15d3eefd
PG
1639 100 - ctleft * 100 / (ctcurrent > 0 and ctcurrent or 1),
1640 nexpect, ctleft))
1641 ct = ins.read (nexpect)
1642 nct = len (ct)
1643 if nct < nexpect:
47d22679 1644 off = tell (ins)
ae3d0f2a
PG
1645 raise EndOfFile (nct,
1646 "hit EOF after %d of %d B in block [%d:%d); "
15d3eefd
PG
1647 "%d B ciphertext remaining for object no %d"
1648 % (nct, nexpect, off, off + nexpect, ctleft,
1649 total_obj))
1650 ctleft -= nct
1651 total_ct += nct
1652 total_read += nct
1653
70ad9458 1654 if PDTCRYPT_VERBOSE is True:
15d3eefd
PG
1655 noise ("PDT:\t· decrypt ciphertext %d B" % (nct))
1656 pt = decr.process (ct)
e3abcdf0 1657 out (pt, outfile)
15d3eefd 1658
d6c15a52 1659
70ad9458 1660def deptdcrypt_mk_stream (kind, path):
d6c15a52 1661 """Create stream from file or stdio descriptor."""
70ad9458 1662 if kind == PDTCRYPT_SINK:
d6c15a52 1663 if path == "-":
70ad9458 1664 if PDTCRYPT_VERBOSE is True: noise ("PDT: sink: stdout")
d6c15a52
PG
1665 return sys.stdout.buffer
1666 else:
70ad9458 1667 if PDTCRYPT_VERBOSE is True: noise ("PDT: sink: file %s" % path)
d6c15a52 1668 return io.FileIO (path, "w")
70ad9458 1669 if kind == PDTCRYPT_SOURCE:
d6c15a52 1670 if path == "-":
70ad9458 1671 if PDTCRYPT_VERBOSE is True: noise ("PDT: source: stdin")
d6c15a52
PG
1672 return sys.stdin.buffer
1673 else:
70ad9458 1674 if PDTCRYPT_VERBOSE is True: noise ("PDT: source: file %s" % path)
d6c15a52
PG
1675 return io.FileIO (path, "r")
1676
1677 raise ValueError ("bogus stream “%s” / %s" % (kind, path))
1678
15d3eefd 1679
a83fa4ed 1680def mode_depdtcrypt (mode, secret, ins, outs):
da82bc58
PG
1681 try:
1682 total_read, total_obj, total_ct, total_pt = \
a83fa4ed 1683 depdtcrypt (mode, secret, ins, outs)
da82bc58
PG
1684 except DecryptionError as exn:
1685 noise ("PDT: Decryption failed:")
1686 noise ("PDT:")
1687 noise ("PDT: “%s”" % exn)
1688 noise ("PDT:")
a83fa4ed 1689 noise ("PDT: Did you specify the correct key / password?")
da82bc58
PG
1690 noise ("")
1691 return 1
1692 except PDTSplitError as exn:
1693 noise ("PDT: Split operation failed:")
1694 noise ("PDT:")
1695 noise ("PDT: “%s”" % exn)
1696 noise ("PDT:")
a83fa4ed 1697 noise ("PDT: Hint: target directory should be empty.")
da82bc58
PG
1698 noise ("")
1699 return 1
1700
1701 if PDTCRYPT_VERBOSE is True:
1702 noise ("PDT: decryption successful" )
1703 noise ("PDT: %.10d bytes read" % total_read)
1704 noise ("PDT: %.10d objects decrypted" % total_obj )
1705 noise ("PDT: %.10d bytes ciphertext" % total_ct )
1706 noise ("PDT: %.10d bytes plaintext" % total_pt )
1707 noise ("" )
1708
1709 return 0
1710
1711
7b3940e5 1712def mode_scrypt (pw, ins=None, nacl=None, fmt=PDTCRYPT_SCRYPT_INTRANATOR):
77058bab 1713 hsh = None
7b3940e5 1714 paramversion = PDTCRYPT_DEFAULT_PVER
77058bab
PG
1715 if ins is not None:
1716 hsh, nacl, version, paramversion = scrypt_hashsource (pw, ins)
1717 defs = ENCRYPTION_PARAMETERS.get(paramversion, None)
1718 else:
1719 nacl = binascii.unhexlify (nacl)
7b3940e5 1720 defs = ENCRYPTION_PARAMETERS.get(paramversion, None)
77058bab
PG
1721 version = PDTCRYPT_DEFAULT_VER
1722
1723 kdfname, params = defs ["kdf"]
1724 if hsh is None:
1725 kdf = kdf_by_version (None, defs)
1726 hsh, _void = kdf (pw, nacl)
da82bc58
PG
1727
1728 import json
7b3940e5
PG
1729
1730 if fmt == PDTCRYPT_SCRYPT_INTRANATOR:
1731 out = json.dumps ({ "salt" : base64.b64encode (nacl).decode ()
1732 , "key" : base64.b64encode (hsh) .decode ()
1733 , "paramversion" : paramversion })
1734 elif fmt == PDTCRYPT_SCRYPT_PARAMETERS:
1735 out = json.dumps ({ "salt" : binascii.hexlify (nacl).decode ()
1736 , "key" : binascii.hexlify (hsh) .decode ()
1737 , "version" : version
1738 , "scrypt_params" : { "N" : params ["N"]
1739 , "r" : params ["r"]
1740 , "p" : params ["p"]
1741 , "dkLen" : params ["dkLen"] } })
1742 else:
1743 raise RuntimeError ("bad scrypt output scheme %r" % fmt)
1744
da82bc58
PG
1745 print (out)
1746
1747
4c62ddc0
PG
1748def noise_output_candidates (cands, indent=8, cols=PDTCRYPT_TT_COLUMNS):
1749 """
1750 Print a list of offsets without garbling the terminal too much.
1751
1752 The indent is counted from column zero; if it is wide enough, the “PDT: ”
1753 marker will be prepended, considered part of the indentation.
1754 """
1755 wd = cols - 1
1756 nc = len (cands)
1757 idt = " " * indent if indent < 5 else "PDT: " + " " * (indent - 5)
1758 line = idt
1759 lpos = indent
1760 sep = ","
1761 lsep = len (sep)
1762 init = True # prevent leading separator
1763
1764 if indent >= wd:
1765 raise ValueError ("the requested indentation exceeds the line "
1766 "width by %d" % (indent - wd))
1767
1768 for n in cands:
1769 ns = "%d" % n
1770 lns = len (ns)
1771 if init is False:
1772 line += sep
1773 lpos += lsep
1774
1775 lpos += lns
1776 if lpos > wd: # line break
1777 noise (line)
1778 line = idt
1779 lpos = indent + lns
1780 elif init is True:
1781 init = False
1782 else: # space
1783 line += ' '
1784 lpos += 1
1785
1786 line += ns
1787
1788 if lpos != indent:
1789 noise (line)
1790
1791
a808459e 1792def mode_scan (secret, fname, outs=None, nacl=None):
f41973a6
PG
1793 """
1794 Dissect a binary file, looking for PDTCRYPT headers and objects.
a808459e
PG
1795
1796 If *outs* is supplied, recoverable data will be dumped into the specified
1797 directory.
f41973a6
PG
1798 """
1799 try:
a808459e 1800 ifd = os.open (fname, os.O_RDONLY)
f41973a6
PG
1801 except FileNotFoundError:
1802 noise ("PDT: failed to open %s readonly" % fname)
1803 noise ("")
1804 usage (err=True)
1805
1806 try:
1807 if PDTCRYPT_VERBOSE is True:
1808 noise ("PDT: scan for potential sync points")
a808459e 1809 cands = locate_hdr_candidates (ifd)
f41973a6
PG
1810 if len (cands) == 0:
1811 noise ("PDT: scan complete: input does not contain potential PDT "
1812 "headers; giving up.")
1813 return -1
1814 if PDTCRYPT_VERBOSE is True:
4c62ddc0
PG
1815 noise ("PDT: scan complete: found %d candidates:" % len (cands))
1816 noise_output_candidates (cands)
6c8073ab 1817 except:
a808459e 1818 os.close (ifd)
6c8073ab 1819 raise
f41973a6 1820
6c8073ab
PG
1821 junk, todo = [], []
1822 try:
a808459e 1823 nobj = 0
6c8073ab 1824 for cand in cands:
a808459e
PG
1825 nobj += 1
1826 vdt, hdr = inspect_hdr (ifd, cand)
6c8073ab
PG
1827 if vdt == HDR_CAND_JUNK:
1828 junk.append (cand)
1829 else:
1830 off0 = cand + PDTCRYPT_HDR_SIZE
1831 if PDTCRYPT_VERBOSE is True:
a808459e 1832 noise ("PDT: obj %d: read payload @%d" % (nobj, off0))
70a33834
PG
1833 pretty = hdr_fmt_pretty (hdr)
1834 noise (reduce (lambda a, e: (a + "\n" if a else "") + "PDT:\t· " + e,
1835 pretty.splitlines (), ""))
6c8073ab 1836
a808459e
PG
1837 ofd = -1
1838 if outs is not None:
1839 ofname = PDTCRYPT_RESCUENAME % nobj
1840 ofd = open2_dump_file (ofname, outs, force=PDTCRYPT_OVERWRITE)
1841
1842 try:
1843 ok = try_decrypt (ifd, off0, hdr, secret, ofd=ofd) == hdr ["ctsize"]
1844 finally:
1845 if ofd != -1:
1846 os.close (ofd)
70a33834 1847 if vdt == HDR_CAND_GOOD and ok is True:
6c8073ab
PG
1848 noise ("PDT: %d → ✓ valid object %d–%d"
1849 % (cand, off0, off0 + hdr ["ctsize"]))
70a33834 1850 elif vdt == HDR_CAND_FISHY and ok is True:
6c8073ab
PG
1851 noise ("PDT: %d → × object %d–%d, corrupt header"
1852 % (cand, off0, off0 + hdr ["ctsize"]))
70a33834 1853 elif vdt == HDR_CAND_GOOD and ok is False:
6c8073ab
PG
1854 noise ("PDT: %d → × object %d–%d, problematic payload"
1855 % (cand, off0, off0 + hdr ["ctsize"]))
70a33834 1856 elif vdt == HDR_CAND_FISHY and ok is False:
6c8073ab
PG
1857 noise ("PDT: %d → × object %d–%d, corrupt header, problematic "
1858 "ciphertext" % (cand, off0, off0 + hdr ["ctsize"]))
1859 else:
1860 raise Unreachable
1861 finally:
a808459e 1862 os.close (ifd)
7b3940e5 1863
70a33834
PG
1864 if len (junk) == 0:
1865 noise ("PDT: all headers ok")
1866 else:
1867 noise ("PDT: %d candidates not parseable as headers:" % len (junk))
1868 noise_output_candidates (junk)
1869
70ad9458
PG
1870def usage (err=False):
1871 out = print
1872 if err is True:
1873 out = noise
5afcb45d 1874 indent = ' ' * len (SELF)
da82bc58 1875 out ("usage: %s SUBCOMMAND { --help" % SELF)
5afcb45d 1876 out (" %s | [ -v ] { -p PASSWORD | -k KEY }" % indent)
77058bab
PG
1877 out (" %s [ { -i | --in } { - | SOURCE } ]" % indent)
1878 out (" %s [ { -n | --nacl } { SALT } ]" % indent)
1879 out (" %s [ { -o | --out } { - | DESTINATION } ]" % indent)
1880 out (" %s [ -D | --no-decrypt ] [ -S | --split ]" % indent)
7b3940e5 1881 out (" %s [ -f | --format ]" % indent)
70ad9458
PG
1882 out ("")
1883 out ("\twhere")
da82bc58
PG
1884 out ("\t\tSUBCOMMAND main mode: { process | scrypt }")
1885 out ("\t\t where:")
1886 out ("\t\t process: extract objects from PDT archive")
1887 out ("\t\t scrypt: calculate hash from password and first object")
a83fa4ed
PG
1888 out ("\t\t-p PASSWORD password to derive the encryption key from")
1889 out ("\t\t-k KEY encryption key as 16 bytes in hexadecimal notation")
e3abcdf0 1890 out ("\t\t-s enforce strict handling of initialization vectors")
70ad9458
PG
1891 out ("\t\t-i SOURCE file name to read from")
1892 out ("\t\t-o DESTINATION file to write output to")
77058bab 1893 out ("\t\t-n SALT provide salt for scrypt mode in hex encoding")
70ad9458 1894 out ("\t\t-v print extra info")
e3abcdf0
PG
1895 out ("\t\t-S split into files at object boundaries; this")
1896 out ("\t\t requires DESTINATION to refer to directory")
1897 out ("\t\t-D PDT header and ciphertext passthrough")
7b3940e5 1898 out ("\t\t-f format of SCRYPT hash output (“default” or “parameters”)")
70ad9458
PG
1899 out ("")
1900 out ("\tinstead of filenames, “-” may used to specify stdin / stdout")
1901 out ("")
1902 sys.exit ((err is True) and 42 or 0)
1903
1904
a83fa4ed
PG
1905def bail (msg):
1906 noise (msg)
1907 noise ("")
1908 usage (err=True)
1909 raise Unreachable
1910
1911
70ad9458 1912def parse_argv (argv):
6690f5e0 1913 global PDTCRYPT_OVERWRITE
70ad9458 1914 global SELF
7b3940e5
PG
1915 mode = PDTCRYPT_DECRYPT
1916 secret = None
1917 insspec = None
1918 outsspec = None
a808459e 1919 outs = None
7b3940e5 1920 nacl = None
4f6405d6 1921 scrypt_format = PDTCRYPT_SCRYPT_DEFAULT
70ad9458
PG
1922
1923 argvi = iter (argv)
1924 SELF = os.path.basename (next (argvi))
1925
da82bc58
PG
1926 try:
1927 rawsubcmd = next (argvi)
1928 subcommand = PDTCRYPT_SUB [rawsubcmd]
1929 except StopIteration:
a83fa4ed 1930 bail ("ERROR: subcommand required")
da82bc58 1931 except KeyError:
a83fa4ed 1932 bail ("ERROR: invalid subcommand “%s” specified" % rawsubcmd)
da82bc58 1933
59d74e2b
PG
1934 def checked_arg ():
1935 nonlocal argvi
1936 try:
1937 return next (argvi)
1938 except StopIteration:
1939 bail ("ERROR: argument list incomplete")
1940
a83fa4ed
PG
1941 def checked_secret (t, arg):
1942 nonlocal secret
1943 if secret is None:
1944 secret = (t, arg)
da82bc58 1945 else:
a83fa4ed 1946 bail ("ERROR: encountered “%s” but secret already given" % arg)
da82bc58 1947
70ad9458
PG
1948 for arg in argvi:
1949 if arg in [ "-h", "--help" ]:
1950 usage ()
1951 raise Unreachable
1952 elif arg in [ "-v", "--verbose", "--wtf" ]:
1953 global PDTCRYPT_VERBOSE
1954 PDTCRYPT_VERBOSE = True
1955 elif arg in [ "-i", "--in", "--source" ]:
59d74e2b 1956 insspec = checked_arg ()
70ad9458 1957 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt from %s" % insspec)
a83fa4ed 1958 elif arg in [ "-p", "--password" ]:
59d74e2b 1959 arg = checked_arg ()
a83fa4ed
PG
1960 checked_secret (PDTCRYPT_SECRET_PW, arg)
1961 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypting with password")
70ad9458 1962 else:
da82bc58
PG
1963 if subcommand == PDTCRYPT_SUB_PROCESS:
1964 if arg in [ "-s", "--strict-ivs" ]:
1965 global PDTCRYPT_STRICTIVS
1966 PDTCRYPT_STRICTIVS = True
77058bab
PG
1967 elif arg in [ "-o", "--out", "--dest", "--sink" ]:
1968 outsspec = checked_arg ()
1969 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt to %s" % outsspec)
da82bc58 1970 elif arg in [ "-f", "--force" ]:
da82bc58
PG
1971 PDTCRYPT_OVERWRITE = True
1972 if PDTCRYPT_VERBOSE is True: noise ("PDT: overwrite existing files")
1973 elif arg in [ "-S", "--split" ]:
1974 mode |= PDTCRYPT_SPLIT
1975 if PDTCRYPT_VERBOSE is True: noise ("PDT: split files")
1976 elif arg in [ "-D", "--no-decrypt" ]:
1977 mode &= ~PDTCRYPT_DECRYPT
1978 if PDTCRYPT_VERBOSE is True: noise ("PDT: not decrypting")
a83fa4ed 1979 elif arg in [ "-k", "--key" ]:
59d74e2b 1980 arg = checked_arg ()
a83fa4ed
PG
1981 checked_secret (PDTCRYPT_SECRET_KEY, arg)
1982 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypting with key")
da82bc58 1983 else:
a83fa4ed 1984 bail ("ERROR: unexpected positional argument “%s”" % arg)
da82bc58 1985 elif subcommand == PDTCRYPT_SUB_SCRYPT:
77058bab
PG
1986 if arg in [ "-n", "--nacl", "--salt" ]:
1987 nacl = checked_arg ()
1988 if PDTCRYPT_VERBOSE is True: noise ("PDT: salt key with %s" % nacl)
7b3940e5
PG
1989 elif arg in [ "-f", "--format" ]:
1990 arg = checked_arg ()
1991 try:
1992 scrypt_format = PDTCRYPT_SCRYPT_FORMAT [arg]
1993 except KeyError:
1994 bail ("ERROR: invalid scrypt output format %s" % arg)
1995 if PDTCRYPT_VERBOSE is True:
1996 noise ("PDT: scrypt output format “%s”" % scrypt_format)
77058bab
PG
1997 else:
1998 bail ("ERROR: unexpected positional argument “%s”" % arg)
f41973a6 1999 elif subcommand == PDTCRYPT_SUB_SCAN:
a808459e
PG
2000 if arg in [ "-o", "--out", "--dest", "--sink" ]:
2001 outsspec = checked_arg ()
2002 if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt to %s" % outsspec)
2003 elif arg in [ "-f", "--force" ]:
a808459e
PG
2004 PDTCRYPT_OVERWRITE = True
2005 if PDTCRYPT_VERBOSE is True: noise ("PDT: overwrite existing files")
2006 else:
2007 bail ("ERROR: unexpected positional argument “%s”" % arg)
70ad9458 2008
a83fa4ed 2009 if secret is None:
ecb9676d 2010 if PDTCRYPT_VERBOSE is True:
a83fa4ed 2011 noise ("ERROR: no password or key specified, trying $PDTCRYPT_PASSWORD")
ecb9676d
PG
2012 epw = os.getenv ("PDTCRYPT_PASSWORD")
2013 if epw is not None:
a83fa4ed
PG
2014 checked_secret (PDTCRYPT_SECRET_PW, epw.strip ())
2015
2016 if secret is None:
2017 if PDTCRYPT_VERBOSE is True:
2018 noise ("ERROR: no password or key specified, trying $PDTCRYPT_KEY")
2019 ek = os.getenv ("PDTCRYPT_KEY")
2020 if ek is not None:
2021 checked_secret (PDTCRYPT_SECRET_KEY, ek.strip ())
ecb9676d 2022
a83fa4ed 2023 if secret is None:
da82bc58 2024 if subcommand == PDTCRYPT_SUB_SCRYPT:
a83fa4ed 2025 bail ("ERROR: scrypt hash mode requested but no password given")
da82bc58 2026 elif mode & PDTCRYPT_DECRYPT:
a83fa4ed
PG
2027 bail ("ERROR: encryption requested but no password given")
2028
a808459e
PG
2029 if mode & PDTCRYPT_SPLIT and outsspec is None:
2030 bail ("ERROR: split mode is incompatible with stdout sink "
2031 "(the default)")
2032
2033 if subcommand == PDTCRYPT_SUB_SCAN and outsspec is None:
2034 pass # no output by default in scan mode
2035 elif mode & PDTCRYPT_SPLIT or subcommand == PDTCRYPT_SUB_SCAN:
2036 # destination must be directory
2037 if outsspec == "-":
2038 bail ("ERROR: mode is incompatible with stdout sink")
2039 try:
2040 try:
2041 os.makedirs (outsspec, 0o700)
2042 except FileExistsError:
2043 # if it’s a directory with appropriate perms, everything is
2044 # good; otherwise, below invocation of open(2) will fail
2045 pass
2046 outs = os.open (outsspec, os.O_DIRECTORY, 0o600)
2047 except FileNotFoundError as exn:
2048 bail ("ERROR: cannot create target directory “%s”" % outsspec)
2049 except NotADirectoryError as exn:
2050 bail ("ERROR: target path “%s” is not a directory" % outsspec)
2051 else:
2052 outs = deptdcrypt_mk_stream (PDTCRYPT_SINK, outsspec or "-")
2053
f41973a6
PG
2054 if subcommand == PDTCRYPT_SUB_SCAN:
2055 if insspec is None:
2056 bail ("ERROR: please supply an input file for scanning")
2057 if insspec == '-':
2058 bail ("ERROR: input must be seekable; please specify a file")
a808459e 2059 return True, partial (mode_scan, secret, insspec, outs, nacl=nacl)
f41973a6 2060
77058bab
PG
2061 if subcommand == PDTCRYPT_SUB_SCRYPT:
2062 if secret [0] == PDTCRYPT_SECRET_KEY:
2063 bail ("ERROR: scrypt mode requires a password")
2064 if insspec is not None and nacl is not None \
2065 or insspec is None and nacl is None :
2066 bail ("ERROR: please supply either an input file or "
2067 "the salt")
70ad9458
PG
2068
2069 # default to stdout
77058bab
PG
2070 ins = None
2071 if insspec is not None or subcommand != PDTCRYPT_SUB_SCRYPT:
2072 ins = deptdcrypt_mk_stream (PDTCRYPT_SOURCE, insspec or "-")
da82bc58
PG
2073
2074 if subcommand == PDTCRYPT_SUB_SCRYPT:
7b3940e5
PG
2075 return True, partial (mode_scrypt, secret [1].encode (), ins, nacl,
2076 fmt=scrypt_format)
da82bc58 2077
a83fa4ed 2078 return True, partial (mode_depdtcrypt, mode, secret, ins, outs)
15d3eefd
PG
2079
2080
00b3cd10 2081def main (argv):
da82bc58 2082 ok, runner = parse_argv (argv)
f08c604b 2083
da82bc58 2084 if ok is True: return runner ()
15d3eefd 2085
da82bc58 2086 return 1
f08c604b 2087
00b3cd10
PG
2088
2089if __name__ == "__main__":
2090 sys.exit (main (sys.argv))
2091