| 1 | #!/usr/bin/env python3 |
| 2 | |
| 3 | """ |
| 4 | Intra2net 2017 |
| 5 | |
| 6 | =============================================================================== |
| 7 | crypto -- Encryption Layer for the Deltatar Backup |
| 8 | =============================================================================== |
| 9 | |
| 10 | Crypto stack: |
| 11 | |
| 12 | - AES-GCM for the symmetric encryption; |
| 13 | - Scrypt as KDF. |
| 14 | |
| 15 | References: |
| 16 | |
| 17 | - NIST Recommendation for Block Cipher Modes of Operation: Galois/Counter |
| 18 | Mode (GCM) and GMAC |
| 19 | http://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf |
| 20 | |
| 21 | - AES-GCM v1: |
| 22 | https://cryptome.org/2014/01/aes-gcm-v1.pdf |
| 23 | |
| 24 | - Authentication weaknesses in GCM |
| 25 | http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/comments/CWC-GCM/Ferguson2.pdf |
| 26 | |
| 27 | Errors |
| 28 | ------------------------------------------------------------------------------- |
| 29 | |
| 30 | Errors fall into roughly three categories: |
| 31 | |
| 32 | - Cryptographical errors or invalid data. |
| 33 | |
| 34 | - ``InvalidGCMTag`` (decryption failed on account of an invalid GCM |
| 35 | tag), |
| 36 | - ``InvalidIVFixedPart`` (IV fixed part of object not found in list), |
| 37 | - ``DuplicateIV`` (the IV of an object encrypted earlier was reused), |
| 38 | - ``NonConsecutiveIV`` (IVs of two encrypted objects are not |
| 39 | consecutive), |
| 40 | - ``DecryptionError`` (used in CLI decryption for presenting error |
| 41 | conditions to the user). |
| 42 | |
| 43 | - Incorrect usage of the library. |
| 44 | |
| 45 | - ``InvalidParameter`` (non-conforming user supplied parameter), |
| 46 | - ``InvalidHeader`` (data passed for reading not parsable into header), |
| 47 | - ``FormatError`` (cannot handle header or parameter version), |
| 48 | - ``RuntimeError``. |
| 49 | |
| 50 | - Bad internal state. If one of these is encountered it means that a state |
| 51 | was reached that shouldn’t occur during normal processing. |
| 52 | |
| 53 | - ``InternalError``, |
| 54 | - ``Unreachable``. |
| 55 | |
| 56 | Also, ``EndOfFile`` is used as a sentinel to communicate that a stream supplied |
| 57 | for reading is exhausted. |
| 58 | |
| 59 | Initialization Vectors |
| 60 | ------------------------------------------------------------------------------- |
| 61 | |
| 62 | Initialization vectors are checked for reuse during the lifetime of a decryptor. |
| 63 | The fixed counters for metadata files cannot be reused and attempts to do so |
| 64 | will cause a DuplicateIV error. This means the length of objects encrypted with |
| 65 | a metadata counter is capped at 63 GB. |
| 66 | |
| 67 | For ordinary, non-metadata payload, there is an optional mode with strict IV |
| 68 | checking that causes a crypto context to fail if an IV encountered or created |
| 69 | was already used for decrypting or encrypting, respectively, an earlier object. |
| 70 | Note that this mode can trigger false positives when decrypting non-linearly, |
| 71 | e. g. when traversing the same object multiple times. Since the crypto context |
| 72 | has no notion of a position in a PDT encrypted archive, this condition must be |
| 73 | sorted out downstream. |
| 74 | |
| 75 | When encrypting with more than one Encrypt context special care must be taken |
| 76 | to prevent accidental reuse of IVs. The builtin protection against reuse is |
| 77 | only effective for objects encrypted with the same Encrypt handle. If multiple |
| 78 | Encrypt handles are used to encrypt with the same combination of password and |
| 79 | salt, the encryption becomes susceptible to birthday attacks (bound = 2^32 due |
| 80 | to the 64-bit random iv). Thus the use of multiple handles is discouraged. |
| 81 | |
| 82 | |
| 83 | Command Line Utility |
| 84 | ------------------------------------------------------------------------------- |
| 85 | |
| 86 | ``crypto.py`` may be invoked as a script for decrypting, validating, and |
| 87 | splitting PDT encrypted files. Consult the usage message for details. |
| 88 | |
| 89 | Usage examples: |
| 90 | |
| 91 | Decrypt from stdin using the password ‘foo’: :: |
| 92 | |
| 93 | $ crypto.py process foo -i - -o - <some-file.tar.gz.pdtcrypt >some-file.tar.gz |
| 94 | |
| 95 | Output verbose information about the encrypted objects in the archive: :: |
| 96 | |
| 97 | $ crypto.py process foo -v -i some-file.tar.gz.pdtcrypt -o /dev/null |
| 98 | PDT: decrypt from some-file.tar.gz.pdtcrypt |
| 99 | PDT: decrypt to /dev/null |
| 100 | PDT: source: file some-file.tar.gz.pdtcrypt |
| 101 | PDT: sink: file /dev/null |
| 102 | PDT: 0 hdr |
| 103 | PDT: · version = 1 : 0100 |
| 104 | PDT: · paramversion = 1 : 0100 |
| 105 | PDT: · nacl : d270 b031 00d1 87e2 c946 610d 7b7f 7e5f |
| 106 | PDT: · iv : 02ee 3dd7 a963 1eb1 0100 0000 |
| 107 | PDT: · ctsize = 591 : 4f02 0000 0000 0000 |
| 108 | PDT: · tag : 5b2d 6d8b 8f82 4842 12fd 0b10 b6e3 369b |
| 109 | PDT: 64 decrypt obj no. 1, 591 B |
| 110 | PDT: · [64] 0% done, read block (591 B of 591 B remaining) |
| 111 | PDT: · decrypt ciphertext 591 B |
| 112 | PDT: · decrypt plaintext 591 B |
| 113 | PDT: 655 finalize |
| 114 | … |
| 115 | |
| 116 | Also, the mode *scrypt* allows deriving encryption keys. To calculate the |
| 117 | encryption key from the password ‘foo’ and the salt of the first object in a |
| 118 | PDT encrypted file: :: |
| 119 | |
| 120 | $ crypto.py scrypt foo -i some-file.pdtcrypt |
| 121 | {"paramversion": 1, "salt": "Cqzbk48e3peEjzWto8D0yA==", "key": "JH9EkMwaM4x9F5aim5gK/Q=="} |
| 122 | |
| 123 | The computed 16 byte key is given in hexadecimal notation in the value to |
| 124 | ``hash`` and can be fed into Python’s ``binascii.unhexlify()`` to obtain the |
| 125 | corresponding binary representation. |
| 126 | |
| 127 | Note that in Scrypt hashing mode, no data integrity checks are being performed. |
| 128 | If the wrong password is given, a wrong key will be derived. Whether the password |
| 129 | was indeed correct can only be determined by decrypting. Note that since PDT |
| 130 | archives essentially consist of a stream of independent objects, the salt and |
| 131 | other parameters may change. Thus a key derived using above method from the |
| 132 | first object doesn’t necessarily apply to any of the subsequent objects. |
| 133 | |
| 134 | Future Developments |
| 135 | ------------------------------------------------------------------------------- |
| 136 | |
| 137 | As of 2020 with the format version 1, Deltatar encryption uses the AES-GCM mode |
| 138 | which requires meticulous bookkeeping of initialization vectors. A future |
| 139 | version could simplify this aspect of the encryption by switching to the more |
| 140 | recent AES-GCM-SIV mode (RFC 8452). |
| 141 | """ |
| 142 | |
| 143 | import base64 |
| 144 | import binascii |
| 145 | import bisect |
| 146 | import ctypes |
| 147 | import io |
| 148 | from functools import reduce, partial |
| 149 | import mmap |
| 150 | import os |
| 151 | import struct |
| 152 | import stat |
| 153 | import sys |
| 154 | import time |
| 155 | import types |
| 156 | import errno |
| 157 | try: |
| 158 | import enum34 |
| 159 | except ImportError as exn: |
| 160 | pass |
| 161 | |
| 162 | if __name__ == "__main__": ## Work around the import mechanism lest Python’s |
| 163 | pwd = os.getcwd() ## preference for local imports causes a cyclical |
| 164 | ## import (crypto → pylibscrypt → […] → ./tarfile → crypto). |
| 165 | sys.path = [ p for p in sys.path if p.find ("deltatar") < 0 ] |
| 166 | |
| 167 | import pylibscrypt |
| 168 | from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes |
| 169 | from cryptography.hazmat.backends import default_backend |
| 170 | import cryptography |
| 171 | |
| 172 | |
| 173 | __all__ = [ "hdr_make", "hdr_read", "hdr_fmt", "hdr_fmt_pretty" |
| 174 | , "scrypt_hashfile" |
| 175 | , "PDTCRYPT_HDR_SIZE", "AES_GCM_IV_CNT_DATA" |
| 176 | , "AES_GCM_IV_CNT_INFOFILE", "AES_GCM_IV_CNT_INDEX" |
| 177 | ] |
| 178 | |
| 179 | |
| 180 | ############################################################################### |
| 181 | ## exceptions |
| 182 | ############################################################################### |
| 183 | |
| 184 | class EndOfFile (Exception): |
| 185 | """Reached EOF.""" |
| 186 | remainder = 0 |
| 187 | msg = 0 |
| 188 | def __init__ (self, n=None, msg=None): |
| 189 | if n is not None: |
| 190 | self.remainder = n |
| 191 | self.msg = msg |
| 192 | |
| 193 | |
| 194 | class InvalidParameter (Exception): |
| 195 | """Inputs not valid for PDT encryption.""" |
| 196 | pass |
| 197 | |
| 198 | |
| 199 | class InvalidHeader (Exception): |
| 200 | """Header not valid.""" |
| 201 | pass |
| 202 | |
| 203 | |
| 204 | class InvalidGCMTag (Exception): |
| 205 | """ |
| 206 | The GCM tag calculated during decryption differs from that in the object |
| 207 | header. |
| 208 | """ |
| 209 | pass |
| 210 | |
| 211 | |
| 212 | class InvalidIVFixedPart (Exception): |
| 213 | """ |
| 214 | IV fixed part not in supplied list: either the backup is corrupt or the |
| 215 | current object does not belong to it. |
| 216 | """ |
| 217 | pass |
| 218 | |
| 219 | |
| 220 | class IVFixedPartError (Exception): |
| 221 | """ |
| 222 | Error creating a unique IV fixed part: repeated calls to system RNG yielded |
| 223 | the same sequence of bytes as the last IV used. |
| 224 | """ |
| 225 | pass |
| 226 | |
| 227 | |
| 228 | class InvalidFileCounter (Exception): |
| 229 | """ |
| 230 | When encrypting, an attempted reuse of a dedicated counter (info file, |
| 231 | index file) was caught. |
| 232 | """ |
| 233 | pass |
| 234 | |
| 235 | |
| 236 | class DuplicateIV (Exception): |
| 237 | """ |
| 238 | During encryption, the current IV fixed part is identical to an already |
| 239 | existing IV (same prefix and file counter). This indicates tampering or |
| 240 | programmer error and cannot be recovered from. |
| 241 | """ |
| 242 | pass |
| 243 | |
| 244 | |
| 245 | class NonConsecutiveIV (Exception): |
| 246 | """ |
| 247 | IVs not numbered consecutively. This is a hard error with strict IV |
| 248 | checking. Precludes random access to the encrypted objects. |
| 249 | """ |
| 250 | pass |
| 251 | |
| 252 | |
| 253 | class CiphertextTooLong (Exception): |
| 254 | """ |
| 255 | An attempt was made to decrypt more data than the ciphertext size declared |
| 256 | in the object header. |
| 257 | """ |
| 258 | pass |
| 259 | |
| 260 | |
| 261 | class FormatError (Exception): |
| 262 | """Unusable parameters in header.""" |
| 263 | pass |
| 264 | |
| 265 | |
| 266 | class DecryptionError (Exception): |
| 267 | """Error during decryption with ``crypto.py`` on the command line.""" |
| 268 | pass |
| 269 | |
| 270 | |
| 271 | class Unreachable (Exception): |
| 272 | """ |
| 273 | Makeshift __builtin_unreachable(); always a programmer error if |
| 274 | thrown. |
| 275 | """ |
| 276 | pass |
| 277 | |
| 278 | |
| 279 | class InternalError (Exception): |
| 280 | """Errors not ascribable to bad user inputs or cryptography.""" |
| 281 | pass |
| 282 | |
| 283 | |
| 284 | ############################################################################### |
| 285 | ## crypto layer version |
| 286 | ############################################################################### |
| 287 | |
| 288 | ENCRYPTION_PARAMETERS = \ |
| 289 | { 0: \ |
| 290 | { "kdf": ("dummy", 16) |
| 291 | , "enc": "passthrough" } |
| 292 | , 1: \ |
| 293 | { "kdf": ( "scrypt" |
| 294 | , { "dkLen" : 16 |
| 295 | , "N" : 1 << 16 |
| 296 | , "r" : 8 |
| 297 | , "p" : 1 |
| 298 | , "NaCl_LEN" : 16 }) |
| 299 | , "enc": "aes-gcm" } } |
| 300 | |
| 301 | # Mode zero is unencrypted and only provided for testing purposes. nless |
| 302 | # the encryptor / decryptor are explicitly instructed to do so. |
| 303 | MIN_SECURE_PARAMETERS = 1 |
| 304 | |
| 305 | ############################################################################### |
| 306 | ## constants |
| 307 | ############################################################################### |
| 308 | |
| 309 | PDTCRYPT_HDR_MAGIC = b"PDTCRYPT" |
| 310 | |
| 311 | PDTCRYPT_HDR_SIZE_MAGIC = 8 # 8 |
| 312 | PDTCRYPT_HDR_SIZE_VERSION = 2 # 10 |
| 313 | PDTCRYPT_HDR_SIZE_PARAMVERSION = 2 # 12 |
| 314 | PDTCRYPT_HDR_SIZE_NACL = 16 # 28 |
| 315 | PDTCRYPT_HDR_SIZE_IV = 12 # 40 |
| 316 | PDTCRYPT_HDR_SIZE_CTSIZE = 8 # 48 |
| 317 | PDTCRYPT_HDR_SIZE_TAG = 16 # 64 GCM auth tag |
| 318 | |
| 319 | PDTCRYPT_HDR_SIZE = PDTCRYPT_HDR_SIZE_MAGIC + PDTCRYPT_HDR_SIZE_VERSION \ |
| 320 | + PDTCRYPT_HDR_SIZE_PARAMVERSION + PDTCRYPT_HDR_SIZE_NACL \ |
| 321 | + PDTCRYPT_HDR_SIZE_IV + PDTCRYPT_HDR_SIZE_CTSIZE \ |
| 322 | + PDTCRYPT_HDR_SIZE_TAG # = 64 |
| 323 | |
| 324 | # precalculate offsets since Python can’t do constant folding over names |
| 325 | HDR_OFF_VERSION = PDTCRYPT_HDR_SIZE_MAGIC |
| 326 | HDR_OFF_PARAMVERSION = HDR_OFF_VERSION + PDTCRYPT_HDR_SIZE_VERSION |
| 327 | HDR_OFF_NACL = HDR_OFF_PARAMVERSION + PDTCRYPT_HDR_SIZE_PARAMVERSION |
| 328 | HDR_OFF_IV = HDR_OFF_NACL + PDTCRYPT_HDR_SIZE_NACL |
| 329 | HDR_OFF_CTSIZE = HDR_OFF_IV + PDTCRYPT_HDR_SIZE_IV |
| 330 | HDR_OFF_TAG = HDR_OFF_CTSIZE + PDTCRYPT_HDR_SIZE_CTSIZE |
| 331 | |
| 332 | FMT_UINT16_LE = "<H" |
| 333 | FMT_UINT64_LE = "<Q" |
| 334 | FMT_I2N_IV = "<8sL" # 8 random bytes ‖ 32 bit counter |
| 335 | FMT_I2N_HDR = ("<" # host byte order |
| 336 | "8s" # magic |
| 337 | "H" # version |
| 338 | "H" # paramversion |
| 339 | "16s" # sodium chloride |
| 340 | "12s" # iv |
| 341 | "Q" # size |
| 342 | "16s") # GCM tag |
| 343 | |
| 344 | # aes+gcm |
| 345 | AES_KEY_SIZE = 16 # b"0123456789abcdef" |
| 346 | AES_KEY_SIZE_B64 = 24 # b'MDEyMzQ1Njc4OWFiY2RlZg==' |
| 347 | |
| 348 | AES_GCM_MAX_SIZE = (1 << 36) - (1 << 5) # 2^39 - 2^8 b ≅ 64 GB. |
| 349 | # Source: NIST SP 800-38D section 5.2.1.1 |
| 350 | # https://crypto.stackexchange.com/questions/31793/plain-text-size-limits-for-aes-gcm-mode-just-64gb |
| 351 | |
| 352 | PDTCRYPT_MAX_OBJ_SIZE_DEFAULT = 63 * (1 << 30) # 63 GB |
| 353 | PDTCRYPT_MAX_OBJ_SIZE = PDTCRYPT_MAX_OBJ_SIZE_DEFAULT |
| 354 | |
| 355 | # index and info files are written on-the fly while encrypting so their |
| 356 | # counters must be available in advance |
| 357 | AES_GCM_IV_CNT_INFOFILE = 1 # constant |
| 358 | AES_GCM_IV_CNT_INDEX = AES_GCM_IV_CNT_INFOFILE + 1 |
| 359 | AES_GCM_IV_CNT_DATA = AES_GCM_IV_CNT_INDEX + 1 # also for multivolume |
| 360 | AES_GCM_IV_CNT_MAX_DEFAULT = 0xffFFffFF |
| 361 | AES_GCM_IV_CNT_MAX = AES_GCM_IV_CNT_MAX_DEFAULT |
| 362 | |
| 363 | # IV structure and generation |
| 364 | PDTCRYPT_IV_GEN_MAX_RETRIES = 10 # × |
| 365 | PDTCRYPT_IV_FIXEDPART_SIZE = 8 # B |
| 366 | PDTCRYPT_IV_COUNTER_SIZE = 4 # B |
| 367 | |
| 368 | # secret type: PW of string | KEY of char [16] |
| 369 | PDTCRYPT_SECRET_PW = 0 |
| 370 | PDTCRYPT_SECRET_KEY = 1 |
| 371 | |
| 372 | ############################################################################### |
| 373 | ## header, trailer |
| 374 | ############################################################################### |
| 375 | # |
| 376 | # Interface: |
| 377 | # |
| 378 | # struct hdrinfo |
| 379 | # { version : u16 |
| 380 | # , paramversion : u16 |
| 381 | # , nacl : [u8; 16] |
| 382 | # , iv : [u8; 12] |
| 383 | # , ctsize : usize |
| 384 | # , tag : [u8; 16] } |
| 385 | # |
| 386 | # fn hdr_read (f : handle) -> hdrinfo; |
| 387 | # fn hdr_make (f : handle, h : hdrinfo) -> IOResult<usize>; |
| 388 | # fn hdr_fmt (h : hdrinfo) -> String; |
| 389 | # |
| 390 | |
| 391 | def hdr_read (data): |
| 392 | """ |
| 393 | Read bytes as header structure. |
| 394 | |
| 395 | If the input could not be interpreted as a header, fail with |
| 396 | ``InvalidHeader``. |
| 397 | """ |
| 398 | |
| 399 | try: |
| 400 | mag, version, paramversion, nacl, iv, ctsize, tag = \ |
| 401 | struct.unpack (FMT_I2N_HDR, data) |
| 402 | except Exception as exn: |
| 403 | raise InvalidHeader ("error unpacking header from [%r]: %s" |
| 404 | % (binascii.hexlify (data), str (exn))) |
| 405 | |
| 406 | if mag != PDTCRYPT_HDR_MAGIC: |
| 407 | raise InvalidHeader ("bad magic in header: expected [%s], got [%s]" |
| 408 | % (PDTCRYPT_HDR_MAGIC, mag)) |
| 409 | |
| 410 | return \ |
| 411 | { "version" : version |
| 412 | , "paramversion" : paramversion |
| 413 | , "nacl" : nacl |
| 414 | , "iv" : iv |
| 415 | , "ctsize" : ctsize |
| 416 | , "tag" : tag |
| 417 | } |
| 418 | |
| 419 | |
| 420 | def hdr_read_stream (instr): |
| 421 | """ |
| 422 | Read header from stream at the current position. |
| 423 | |
| 424 | Fail with ``InvalidHeader`` if insufficient bytes were read from the |
| 425 | stream, or if the content could not be interpreted as a header. |
| 426 | """ |
| 427 | data = instr.read(PDTCRYPT_HDR_SIZE) |
| 428 | ldata = len (data) |
| 429 | if ldata == 0: |
| 430 | raise EndOfFile |
| 431 | elif ldata != PDTCRYPT_HDR_SIZE: |
| 432 | raise InvalidHeader ("hdr_read_stream: expected %d B, received %d B" |
| 433 | % (PDTCRYPT_HDR_SIZE, ldata)) |
| 434 | return hdr_read (data) |
| 435 | |
| 436 | |
| 437 | def hdr_from_params (version, paramversion, nacl, iv, ctsize, tag): |
| 438 | """ |
| 439 | Assemble the necessary values into a PDTCRYPT header. |
| 440 | |
| 441 | :type version: int to fit uint16_t |
| 442 | :type paramversion: int to fit uint16_t |
| 443 | :type nacl: bytes to fit uint8_t[16] |
| 444 | :type iv: bytes to fit uint8_t[12] |
| 445 | :type size: int to fit uint64_t |
| 446 | :type tag: bytes to fit uint8_t[16] |
| 447 | """ |
| 448 | buf = bytearray (PDTCRYPT_HDR_SIZE) |
| 449 | bufv = memoryview (buf) |
| 450 | |
| 451 | try: |
| 452 | struct.pack_into (FMT_I2N_HDR, bufv, 0, |
| 453 | PDTCRYPT_HDR_MAGIC, |
| 454 | version, paramversion, nacl, iv, ctsize, tag) |
| 455 | except Exception as exn: |
| 456 | return False, "error assembling header: %s" % str (exn) |
| 457 | |
| 458 | return True, bytes (buf) |
| 459 | |
| 460 | |
| 461 | def hdr_make_dummy (s): |
| 462 | """ |
| 463 | Create a header sized block of bytes initialized to a value derived from a |
| 464 | string. Used to verify we’ve jumped back correctly to the actual position |
| 465 | of the object header. |
| 466 | """ |
| 467 | c = reduce (lambda a, c: a + ord(c), s, 0) % 0xFF |
| 468 | return bytes (bytearray (struct.pack ("B", c)) * PDTCRYPT_HDR_SIZE) |
| 469 | |
| 470 | |
| 471 | def hdr_make (hdr): |
| 472 | """ |
| 473 | Assemble a header from the given header structure. |
| 474 | """ |
| 475 | return hdr_from_params (version=hdr.get("version"), |
| 476 | paramversion=hdr.get("paramversion"), |
| 477 | nacl=hdr.get("nacl"), iv=hdr.get("iv"), |
| 478 | ctsize=hdr.get("ctsize"), tag=hdr.get("tag")) |
| 479 | |
| 480 | |
| 481 | HDR_FMT = "I2n_header { version: %d, paramversion: %d, nacl: %s[%d]," \ |
| 482 | " iv: %s[%d], ctsize: %d, tag: %s[%d] }" |
| 483 | |
| 484 | def hdr_fmt (h): |
| 485 | """Format a header structure into readable output.""" |
| 486 | return HDR_FMT % (h["version"], h["paramversion"], |
| 487 | binascii.hexlify (h["nacl"]), len(h["nacl"]), |
| 488 | binascii.hexlify (h["iv"]), len(h["iv"]), |
| 489 | h["ctsize"], |
| 490 | binascii.hexlify (h["tag"]), len(h["tag"])) |
| 491 | |
| 492 | |
| 493 | def hex_spaced_of_bytes (b): |
| 494 | """Format bytes object, hexdump style.""" |
| 495 | return " ".join ([ "%.2x%.2x" % (c1, c2) |
| 496 | for c1, c2 in zip (b[0::2], b[1::2]) ]) \ |
| 497 | + (len (b) | 1 == len (b) and " %.2x" % b[-1] or "") # odd lengths |
| 498 | |
| 499 | |
| 500 | def hdr_iv_counter (h): |
| 501 | """Extract the variable part of the IV of the given header.""" |
| 502 | _fixed, cnt = struct.unpack (FMT_I2N_IV, h ["iv"]) |
| 503 | return cnt |
| 504 | |
| 505 | |
| 506 | def hdr_iv_fixed (h): |
| 507 | """Extract the fixed part of the IV of the given header.""" |
| 508 | fixed, _cnt = struct.unpack (FMT_I2N_IV, h ["iv"]) |
| 509 | return fixed |
| 510 | |
| 511 | |
| 512 | hdr_dump = hex_spaced_of_bytes |
| 513 | |
| 514 | |
| 515 | HDR_FMT_PRETTY = \ |
| 516 | """version = %-4d : %s |
| 517 | paramversion = %-4d : %s |
| 518 | nacl : %s |
| 519 | iv : %s |
| 520 | ctsize = %-20d : %s |
| 521 | tag : %s |
| 522 | """ |
| 523 | |
| 524 | def hdr_fmt_pretty (h): |
| 525 | """ |
| 526 | Format header structure into multi-line representation of its contents and |
| 527 | their raw representation. (Omit the implicit “PDTCRYPT” magic bytes that |
| 528 | precede every header.) |
| 529 | """ |
| 530 | return HDR_FMT_PRETTY \ |
| 531 | % (h["version"], |
| 532 | hex_spaced_of_bytes (struct.pack (FMT_UINT16_LE, h["version"])), |
| 533 | h["paramversion"], |
| 534 | hex_spaced_of_bytes (struct.pack (FMT_UINT16_LE, h["paramversion"])), |
| 535 | hex_spaced_of_bytes (h["nacl"]), |
| 536 | hex_spaced_of_bytes (h["iv"]), |
| 537 | h["ctsize"], |
| 538 | hex_spaced_of_bytes (struct.pack (FMT_UINT64_LE, h["ctsize"])), |
| 539 | hex_spaced_of_bytes (h["tag"])) |
| 540 | |
| 541 | IV_FMT = "((f %s) (c %d))" |
| 542 | |
| 543 | def iv_fmt (iv): |
| 544 | """Format the two components of an IV in a readable fashion.""" |
| 545 | fixed, cnt = struct.unpack (FMT_I2N_IV, iv) |
| 546 | return IV_FMT % (binascii.hexlify (fixed).decode (), cnt) |
| 547 | |
| 548 | |
| 549 | ############################################################################### |
| 550 | ## restoration |
| 551 | ############################################################################### |
| 552 | |
| 553 | class Location (object): |
| 554 | n = 0 |
| 555 | offset = 0 |
| 556 | |
| 557 | def restore_loc_fmt (loc): |
| 558 | return "%d off:%d" \ |
| 559 | % (loc.n, loc.offset) |
| 560 | |
| 561 | def locate_hdr_candidates (fd): |
| 562 | """ |
| 563 | Walk over instances of the magic string in the payload, collecting their |
| 564 | positions. If the offset of the first found instance is not zero, the file |
| 565 | begins with leading garbage. Used by desaster recovery. |
| 566 | |
| 567 | :return: The list of offsets in the file. |
| 568 | """ |
| 569 | cands = [] |
| 570 | |
| 571 | mm = mmap.mmap(fd, 0, mmap.MAP_SHARED, mmap.PROT_READ) |
| 572 | pos = 0 |
| 573 | while True: |
| 574 | pos = mm.find (PDTCRYPT_HDR_MAGIC, pos) |
| 575 | if pos == -1: |
| 576 | break |
| 577 | cands.append (pos) |
| 578 | pos += 1 |
| 579 | |
| 580 | return cands |
| 581 | |
| 582 | |
| 583 | HDR_CAND_GOOD = 0 # header marks begin of valid object |
| 584 | HDR_CAND_FISHY = 1 # inconclusive (tag mismatch, obj overlap etc.) |
| 585 | HDR_CAND_JUNK = 2 # not a header / object unreadable |
| 586 | |
| 587 | HDR_VERDICT_NAME = \ |
| 588 | { HDR_CAND_GOOD : "valid" |
| 589 | , HDR_CAND_FISHY : "fishy" |
| 590 | , HDR_CAND_JUNK : "junk" |
| 591 | } |
| 592 | |
| 593 | |
| 594 | def verdict_fmt (vdt): |
| 595 | return HDR_VERDICT_NAME [vdt] |
| 596 | |
| 597 | |
| 598 | def inspect_hdr (fd, off): |
| 599 | """ |
| 600 | Attempt to parse a header in *fd* at position *off*. |
| 601 | |
| 602 | Returns a verdict about the quality of that header plus the parsed header |
| 603 | when readable. |
| 604 | """ |
| 605 | |
| 606 | _ = os.lseek (fd, off, os.SEEK_SET) |
| 607 | |
| 608 | if os.lseek (fd, 0, os.SEEK_CUR) != off: |
| 609 | if PDTCRYPT_VERBOSE is True: |
| 610 | noise ("PDT: %d → dismissed (lseek() past EOF)" % off) |
| 611 | return HDR_CAND_JUNK, None |
| 612 | |
| 613 | raw = os.read (fd, PDTCRYPT_HDR_SIZE) |
| 614 | if len (raw) != PDTCRYPT_HDR_SIZE: |
| 615 | if PDTCRYPT_VERBOSE is True: |
| 616 | noise ("PDT: %d → dismissed (EOF inside header)" % off) |
| 617 | return HDR_CAND_JUNK, None |
| 618 | |
| 619 | try: |
| 620 | hdr = hdr_read (raw) |
| 621 | except InvalidHeader as exn: |
| 622 | if PDTCRYPT_VERBOSE is True: |
| 623 | noise ("PDT: %d → dismissed (invalid: [%s])" % (off, str (exn))) |
| 624 | return HDR_CAND_JUNK, None |
| 625 | |
| 626 | obj0 = off + PDTCRYPT_HDR_SIZE |
| 627 | objX = obj0 + hdr ["ctsize"] |
| 628 | |
| 629 | eof = os.lseek (fd, 0, os.SEEK_END) |
| 630 | if eof < objX: |
| 631 | if PDTCRYPT_VERBOSE is True: |
| 632 | noise ("PDT: %d → EOF inside object (%d≤%d≤%d); adjusting size to " |
| 633 | "%d" % (off, obj0, eof, objX, (eof - obj0))) |
| 634 | # try reading up to the end |
| 635 | hdr ["ctsize"] = eof - obj0 |
| 636 | return HDR_CAND_FISHY, hdr |
| 637 | |
| 638 | return HDR_CAND_GOOD, hdr |
| 639 | |
| 640 | |
| 641 | def try_decrypt (ifd, off, hdr, secret, ofd=-1): |
| 642 | """ |
| 643 | Attempt to decrypt the object in the (seekable) descriptor *ifd* starting |
| 644 | at *off* using the metadata in *hdr* and *secret*. An output fd can be |
| 645 | specified with *ofd*; if it is *-1* – the default –, the decrypted payload |
| 646 | will be discarded. |
| 647 | |
| 648 | Always creates a fresh decryptor, so validation steps across objects don’t |
| 649 | apply. |
| 650 | |
| 651 | Errors during GCM tag validation are ignored. Used by desaster recovery. |
| 652 | """ |
| 653 | ctleft = hdr ["ctsize"] |
| 654 | pos = off |
| 655 | |
| 656 | ks = secret [0] |
| 657 | if ks == PDTCRYPT_SECRET_PW: |
| 658 | decr = Decrypt (password=secret [1]) |
| 659 | elif ks == PDTCRYPT_SECRET_KEY: |
| 660 | key = secret [1] |
| 661 | decr = Decrypt (key=key) |
| 662 | else: |
| 663 | raise RuntimeError |
| 664 | |
| 665 | decr.next (hdr) |
| 666 | |
| 667 | try: |
| 668 | os.lseek (ifd, pos, os.SEEK_SET) |
| 669 | pt = b"" |
| 670 | while ctleft > 0: |
| 671 | cnksiz = min (ctleft, PDTCRYPT_BLOCKSIZE) |
| 672 | cnk = os.read (ifd, cnksiz) |
| 673 | ctleft -= cnksiz |
| 674 | pos += cnksiz |
| 675 | pt = decr.process (cnk) |
| 676 | if ofd != -1: |
| 677 | os.write (ofd, pt) |
| 678 | try: |
| 679 | pt = decr.done () |
| 680 | except InvalidGCMTag: |
| 681 | noise ("PDT: GCM tag mismatch for object %d–%d" |
| 682 | % (off, off + hdr ["ctsize"])) |
| 683 | if len (pt) > 0 and ofd != -1: |
| 684 | os.write (ofd, pt) |
| 685 | |
| 686 | except Exception as exn: |
| 687 | noise ("PDT: error decrypting object %d–%d@%d, %d B remaining [%s]" |
| 688 | % (off, off + hdr ["ctsize"], pos, ctleft, exn)) |
| 689 | raise |
| 690 | |
| 691 | return pos - off |
| 692 | |
| 693 | |
| 694 | def readable_objects_offsets (ifd, secret, cands): |
| 695 | """ |
| 696 | From a list of candidates, locate the ones that mark the start of actual |
| 697 | readable PDTCRYPT objects. |
| 698 | """ |
| 699 | good = [] |
| 700 | |
| 701 | for i, cand in enumerate (cands): |
| 702 | vdt, hdr = inspect_hdr (ifd, cand) |
| 703 | if vdt == HDR_CAND_JUNK: |
| 704 | pass # ignore unreadable ones |
| 705 | elif vdt in [HDR_CAND_GOOD, HDR_CAND_FISHY]: |
| 706 | ctsize = hdr ["ctsize"] |
| 707 | off0 = cand + PDTCRYPT_HDR_SIZE |
| 708 | ok = try_decrypt (ifd, off0, hdr, secret) == ctsize |
| 709 | if ok is True: |
| 710 | good.append ((cand, off0 + ctsize)) |
| 711 | |
| 712 | overlap = find_overlaps (good) |
| 713 | |
| 714 | return [ g [0] for g in good ] |
| 715 | |
| 716 | |
| 717 | def reconstruct_offsets (fname, secret): |
| 718 | ifd = os.open (fname, os.O_RDONLY) |
| 719 | |
| 720 | try: |
| 721 | cands = locate_hdr_candidates (ifd) |
| 722 | return readable_objects_offsets (ifd, secret, cands) |
| 723 | finally: |
| 724 | os.close (ifd) |
| 725 | |
| 726 | |
| 727 | ############################################################################### |
| 728 | ## helpers |
| 729 | ############################################################################### |
| 730 | |
| 731 | def make_secret (password=None, key=None): |
| 732 | """ |
| 733 | Safely create a “secret” value that consists either of a key or a password. |
| 734 | Inputs are validated: the password is accepted as (UTF-8 encoded) bytes or |
| 735 | string; for the key only a bytes object of the proper size or a base64 |
| 736 | encoded string thereof is accepted. |
| 737 | |
| 738 | If both are provided, the key is preferred over the password; no checks are |
| 739 | performed whether the key is derived from the password. |
| 740 | |
| 741 | :returns: secret value if inputs were acceptable | None otherwise. |
| 742 | """ |
| 743 | if key is not None: |
| 744 | if isinstance (key, str) is True: |
| 745 | key = key.encode ("utf-8") |
| 746 | if isinstance (key, bytes) is True: |
| 747 | if len (key) == AES_KEY_SIZE: |
| 748 | return (PDTCRYPT_SECRET_KEY, key) |
| 749 | if len (key) == AES_KEY_SIZE * 2: |
| 750 | try: |
| 751 | key = binascii.unhexlify (key) |
| 752 | return (PDTCRYPT_SECRET_KEY, key) |
| 753 | except binascii.Error: # garbage in string |
| 754 | pass |
| 755 | if len (key) == AES_KEY_SIZE_B64: |
| 756 | try: |
| 757 | key = base64.b64decode (key) |
| 758 | # the base64 processor is very tolerant and allows for |
| 759 | # arbitrary trailing and leading data thus the data obtained |
| 760 | # must be checked for the proper length |
| 761 | if len (key) == AES_KEY_SIZE: |
| 762 | return (PDTCRYPT_SECRET_KEY, key) |
| 763 | except binascii.Error: # “incorrect padding” |
| 764 | pass |
| 765 | elif password is not None: |
| 766 | if isinstance (password, str) is True: |
| 767 | return (PDTCRYPT_SECRET_PW, password) |
| 768 | elif isinstance (password, bytes) is True: |
| 769 | try: |
| 770 | password = password.decode ("utf-8") |
| 771 | return (PDTCRYPT_SECRET_PW, password) |
| 772 | except UnicodeDecodeError: |
| 773 | pass |
| 774 | |
| 775 | return None |
| 776 | |
| 777 | |
| 778 | ############################################################################### |
| 779 | ## passthrough / null encryption |
| 780 | ############################################################################### |
| 781 | |
| 782 | class PassthroughCipher (object): |
| 783 | |
| 784 | tag = struct.pack ("<QQ", 0, 0) |
| 785 | |
| 786 | def __init__ (self) : pass |
| 787 | |
| 788 | def update (self, b) : return b |
| 789 | |
| 790 | def finalize (self) : return b"" |
| 791 | |
| 792 | def finalize_with_tag (self, _) : return b"" |
| 793 | |
| 794 | ############################################################################### |
| 795 | ## convenience wrapper |
| 796 | ############################################################################### |
| 797 | |
| 798 | |
| 799 | def kdf_dummy (klen, password, _nacl): |
| 800 | """ |
| 801 | Fake KDF for testing purposes that is called when parameter version zero is |
| 802 | encountered. |
| 803 | """ |
| 804 | q, r = divmod (klen, len (password)) |
| 805 | if isinstance (password, bytes) is False: |
| 806 | password = password.encode () |
| 807 | return password * q + password [:r], b"" |
| 808 | |
| 809 | |
| 810 | SCRYPT_KEY_MEMO = { } # static because needed for both the info file and the archive |
| 811 | |
| 812 | |
| 813 | def kdf_scrypt (params, password, nacl): |
| 814 | """ |
| 815 | Wrapper for the Scrypt KDF, corresponds to parameter version one. The |
| 816 | computation result is memoized based on the inputs to facilitate spawning |
| 817 | multiple encryption contexts. |
| 818 | """ |
| 819 | N = params["N"] |
| 820 | r = params["r"] |
| 821 | p = params["p"] |
| 822 | dkLen = params["dkLen"] |
| 823 | |
| 824 | if nacl is None: |
| 825 | nacl = os.urandom (params["NaCl_LEN"]) |
| 826 | |
| 827 | key_parms = (password, nacl, N, r, p, dkLen) |
| 828 | global SCRYPT_KEY_MEMO |
| 829 | if key_parms not in SCRYPT_KEY_MEMO: |
| 830 | SCRYPT_KEY_MEMO [key_parms] = \ |
| 831 | pylibscrypt.scrypt (password, nacl, N, r, p, dkLen) |
| 832 | return SCRYPT_KEY_MEMO [key_parms], nacl |
| 833 | |
| 834 | |
| 835 | def kdf_by_version (paramversion=None, defs=None): |
| 836 | """ |
| 837 | Pick the KDF handler corresponding to the parameter version or the |
| 838 | definition set. |
| 839 | |
| 840 | :rtype: function (password : str, nacl : str) -> str |
| 841 | """ |
| 842 | if paramversion is not None: |
| 843 | defs = ENCRYPTION_PARAMETERS.get(paramversion, None) |
| 844 | if defs is None: |
| 845 | raise InvalidParameter ("no encryption parameters for version %r" |
| 846 | % paramversion) |
| 847 | (kdf, params) = defs["kdf"] |
| 848 | fn = None |
| 849 | if kdf == "scrypt" : fn = kdf_scrypt |
| 850 | elif kdf == "dummy" : fn = kdf_dummy |
| 851 | if fn is None: |
| 852 | raise ValueError ("key derivation method %r unknown" % kdf) |
| 853 | return partial (fn, params) |
| 854 | |
| 855 | |
| 856 | ############################################################################### |
| 857 | ## SCRYPT hashing |
| 858 | ############################################################################### |
| 859 | |
| 860 | def scrypt_hashsource (pw, ins): |
| 861 | """ |
| 862 | Calculate the SCRYPT hash from the password and the information contained |
| 863 | in the first header found in ``ins``. |
| 864 | |
| 865 | This does not validate whether the first object is encrypted correctly. |
| 866 | """ |
| 867 | if isinstance (pw, str) is True: |
| 868 | pw = str.encode (pw) |
| 869 | elif isinstance (pw, bytes) is False: |
| 870 | raise InvalidParameter ("password must be a string, not %s" |
| 871 | % type (pw)) |
| 872 | if isinstance (ins, io.BufferedReader) is False and \ |
| 873 | isinstance (ins, io.FileIO) is False: |
| 874 | raise InvalidParameter ("file to hash must be opened in “binary” mode") |
| 875 | hdr = None |
| 876 | try: |
| 877 | hdr = hdr_read_stream (ins) |
| 878 | except EndOfFile as exn: |
| 879 | noise ("PDT: malformed input: end of file reading first object header") |
| 880 | noise ("PDT:") |
| 881 | return 1 |
| 882 | |
| 883 | nacl = hdr ["nacl"] |
| 884 | pver = hdr ["paramversion"] |
| 885 | if PDTCRYPT_VERBOSE is True: |
| 886 | noise ("PDT: salt of first object : %s" % binascii.hexlify (nacl)) |
| 887 | noise ("PDT: parameter version of archive : %d" % pver) |
| 888 | |
| 889 | try: |
| 890 | defs = ENCRYPTION_PARAMETERS.get(pver, None) |
| 891 | kdfname, params = defs ["kdf"] |
| 892 | if kdfname != "scrypt": |
| 893 | noise ("PDT: input is not an SCRYPT archive") |
| 894 | noise ("") |
| 895 | return 1 |
| 896 | kdf = kdf_by_version (None, defs) |
| 897 | except ValueError as exn: |
| 898 | noise ("PDT: object has unknown parameter version %d" % pver) |
| 899 | |
| 900 | hsh, _void = kdf (pw, nacl) |
| 901 | |
| 902 | return hsh, nacl, hdr ["version"], pver |
| 903 | |
| 904 | |
| 905 | def scrypt_hashfile (pw, fname): |
| 906 | """ |
| 907 | Calculate the SCRYPT hash from the password and the information contained |
| 908 | in the first header found in the given file. The header is read only at |
| 909 | offset zero. |
| 910 | """ |
| 911 | with deptdcrypt_mk_stream (PDTCRYPT_SOURCE, fname or "-") as ins: |
| 912 | hsh, _void, _void, _void = scrypt_hashsource (pw, ins) |
| 913 | return hsh |
| 914 | |
| 915 | |
| 916 | ############################################################################### |
| 917 | ## AES-GCM context |
| 918 | ############################################################################### |
| 919 | |
| 920 | class Crypto (object): |
| 921 | """ |
| 922 | Encryption context to remain alive throughout an entire tarfile pass. |
| 923 | """ |
| 924 | enc = None |
| 925 | nacl = None |
| 926 | key = None |
| 927 | cnt = None # file counter (uint32_t != 0) |
| 928 | iv = None # current IV |
| 929 | fixed = None # accu for 64 bit fixed parts of IV |
| 930 | used_ivs = None # tracks IVs |
| 931 | strict_ivs = False # if True, panic on duplicate or non-consecutive object IV |
| 932 | password = None |
| 933 | paramversion = None |
| 934 | insecure = False # allow plaintext parameters |
| 935 | stats = { "in" : 0 |
| 936 | , "out" : 0 |
| 937 | , "obj" : 0 } |
| 938 | |
| 939 | ctsize = -1 |
| 940 | ptsize = -1 |
| 941 | info_counter_used = False |
| 942 | index_counter_used = False |
| 943 | |
| 944 | def __init__ (self, *al, **akv): |
| 945 | self.used_ivs = set () |
| 946 | self.set_parameters (*al, **akv) |
| 947 | |
| 948 | |
| 949 | def next_fixed (self): |
| 950 | # NOP for decryption |
| 951 | pass |
| 952 | |
| 953 | |
| 954 | def set_object_counter (self, cnt=None): |
| 955 | """ |
| 956 | Safely set the internal counter of encrypted objects. Numerous |
| 957 | constraints apply: |
| 958 | |
| 959 | The same counter may not be reused in combination with one IV fixed |
| 960 | part. This is validated elsewhere in the IV handling. |
| 961 | |
| 962 | Counter zero is invalid. The first two counters are reserved for |
| 963 | metadata. The implementation does not allow for splitting metadata |
| 964 | files over multiple encrypted objects. (This would be possible by |
| 965 | assigning new fixed parts.) Thus in a Deltatar backup there is at most |
| 966 | one object with a counter value of one and two. On creation of a |
| 967 | context, the initial counter may be chosen. The globals |
| 968 | ``AES_GCM_IV_CNT_INFOFILE`` and ``AES_GCM_IV_CNT_INDEX`` can be used to |
| 969 | request one of the reserved values. If one of these values has been |
| 970 | used, any further attempt of setting the counter to that value will |
| 971 | be rejected with an ``InvalidFileCounter`` exception. |
| 972 | |
| 973 | Out of bounds values (i. e. below one and more than the maximum of 2³²) |
| 974 | cause an ``InvalidParameter`` exception to be thrown. |
| 975 | """ |
| 976 | if cnt is None: |
| 977 | self.cnt = AES_GCM_IV_CNT_DATA |
| 978 | return |
| 979 | if cnt == 0 or cnt > AES_GCM_IV_CNT_MAX + 1: |
| 980 | raise InvalidParameter ("invalid counter value %d requested: " |
| 981 | "acceptable values are from 1 to %d" |
| 982 | % (cnt, AES_GCM_IV_CNT_MAX)) |
| 983 | if cnt == AES_GCM_IV_CNT_INFOFILE: |
| 984 | if self.info_counter_used is True: |
| 985 | raise InvalidFileCounter ("attempted to reuse info file " |
| 986 | "counter %d: must be unique" % cnt) |
| 987 | self.info_counter_used = True |
| 988 | elif cnt == AES_GCM_IV_CNT_INDEX: |
| 989 | if self.index_counter_used is True: |
| 990 | raise InvalidFileCounter ("attempted to reuse index file " |
| 991 | "counter %d: must be unique" % cnt) |
| 992 | self.index_counter_used = True |
| 993 | if cnt <= AES_GCM_IV_CNT_MAX: |
| 994 | self.cnt = cnt |
| 995 | return |
| 996 | # cnt == AES_GCM_IV_CNT_MAX + 1 → wrap |
| 997 | self.cnt = AES_GCM_IV_CNT_DATA |
| 998 | self.next_fixed () |
| 999 | |
| 1000 | |
| 1001 | def set_parameters (self, password=None, key=None, paramversion=None, |
| 1002 | nacl=None, counter=None, strict_ivs=False, |
| 1003 | insecure=False): |
| 1004 | """ |
| 1005 | Configure the internal state of a crypto context. Not intended for |
| 1006 | external use. |
| 1007 | |
| 1008 | A parameter version indicating passthrough (plaintext) mode is rejected |
| 1009 | with an ``InvalidParameter`` unless ``insecure`` is set. |
| 1010 | """ |
| 1011 | self.next_fixed () |
| 1012 | self.set_object_counter (counter) |
| 1013 | self.strict_ivs = strict_ivs |
| 1014 | |
| 1015 | self.insecure = insecure |
| 1016 | |
| 1017 | if paramversion is not None: |
| 1018 | if self.insecure is False \ |
| 1019 | and paramversion < MIN_SECURE_PARAMETERS: |
| 1020 | raise InvalidParameter \ |
| 1021 | ("set_parameters: requested parameter version %d but " |
| 1022 | "plaintext encryption disallowed in secure context!" |
| 1023 | % paramversion) |
| 1024 | self.paramversion = paramversion |
| 1025 | |
| 1026 | if key is not None: |
| 1027 | self.key, self.nacl = key, nacl |
| 1028 | return |
| 1029 | |
| 1030 | if password is not None: |
| 1031 | if isinstance (password, bytes) is False: |
| 1032 | password = str.encode (password) |
| 1033 | self.password = password |
| 1034 | if paramversion is None and nacl is None: |
| 1035 | # postpone key setup until first header is available |
| 1036 | return |
| 1037 | kdf = kdf_by_version (paramversion) |
| 1038 | if kdf is not None: |
| 1039 | self.key, self.nacl = kdf (password, nacl) |
| 1040 | |
| 1041 | |
| 1042 | def process (self, buf): |
| 1043 | """ |
| 1044 | Encrypt / decrypt a buffer. Invokes the ``.update()`` method on the |
| 1045 | wrapped encryptor or decryptor, respectively. |
| 1046 | |
| 1047 | The Cryptography exception ``AlreadyFinalized`` is translated to an |
| 1048 | ``InternalError`` at this point. It may occur in sound code when the GC |
| 1049 | closes an encrypting stream after an error. Everywhere else it must be |
| 1050 | treated as a bug. |
| 1051 | """ |
| 1052 | if self.enc is None: |
| 1053 | raise RuntimeError ("process: context not initialized") |
| 1054 | self.stats ["in"] += len (buf) |
| 1055 | try: |
| 1056 | out = self.enc.update (buf) |
| 1057 | except cryptography.exceptions.AlreadyFinalized as exn: |
| 1058 | raise InternalError (exn) |
| 1059 | self.stats ["out"] += len (out) |
| 1060 | return out |
| 1061 | |
| 1062 | |
| 1063 | def next (self, password, paramversion, nacl): |
| 1064 | """ |
| 1065 | Prepare for encrypting another object: Reset the data counters and |
| 1066 | change the configuration in case one of the variable parameters differs |
| 1067 | from the last object. |
| 1068 | """ |
| 1069 | self.ctsize = 0 |
| 1070 | self.ptsize = 0 |
| 1071 | self.stats ["obj"] += 1 |
| 1072 | |
| 1073 | if ( self.paramversion != paramversion |
| 1074 | or self.password != password |
| 1075 | or self.nacl != nacl): |
| 1076 | self.set_parameters (password=password, paramversion=paramversion, |
| 1077 | nacl=nacl, strict_ivs=self.strict_ivs, |
| 1078 | insecure=self.insecure) |
| 1079 | |
| 1080 | |
| 1081 | def counters (self): |
| 1082 | """ |
| 1083 | Access the data counters. |
| 1084 | """ |
| 1085 | return self.stats ["obj"], self.stats ["in"], self.stats ["out"] |
| 1086 | |
| 1087 | |
| 1088 | def drop (self): |
| 1089 | """ |
| 1090 | Clear the current context regardless of its finalization state. The |
| 1091 | next operation must be ``.next()``. |
| 1092 | """ |
| 1093 | self.enc = None |
| 1094 | |
| 1095 | |
| 1096 | def get_used_ivs (self): |
| 1097 | """ |
| 1098 | Get the set of IVs that were used so far during the lifetime of |
| 1099 | this context. Useful to check for IV reuse if multiple encryption |
| 1100 | contexts were used independently. |
| 1101 | """ |
| 1102 | return self.used_ivs |
| 1103 | |
| 1104 | |
| 1105 | def reset_last_iv (self): |
| 1106 | """ |
| 1107 | Implemented only for decryptor; no-op otherwise. |
| 1108 | """ |
| 1109 | pass |
| 1110 | |
| 1111 | |
| 1112 | class Encrypt (Crypto): |
| 1113 | |
| 1114 | lastinfo = None |
| 1115 | version = None |
| 1116 | paramenc = None |
| 1117 | |
| 1118 | def __init__ (self, version, paramversion, password=None, key=None, nacl=None, |
| 1119 | counter=AES_GCM_IV_CNT_DATA, strict_ivs=False, insecure=False): |
| 1120 | """ |
| 1121 | The ctor will throw immediately if one of the parameters does not conform |
| 1122 | to our expectations. |
| 1123 | |
| 1124 | :type version: int to fit uint16_t |
| 1125 | :type paramversion: int to fit uint16_t |
| 1126 | :param password: mutually exclusive with ``key`` |
| 1127 | :type password: bytes |
| 1128 | :param key: mutually exclusive with ``password`` |
| 1129 | :type key: bytes |
| 1130 | :type nacl: bytes |
| 1131 | :type counter: initial object counter the values |
| 1132 | ``AES_GCM_IV_CNT_INFOFILE`` and |
| 1133 | ``AES_GCM_IV_CNT_INDEX`` are unique in each backup set |
| 1134 | and cannot be reused even with different fixed parts. |
| 1135 | :type strict_ivs: bool |
| 1136 | :param strict_ivs: Enable paranoid tracking of IVs. |
| 1137 | :type insecure: bool |
| 1138 | :param insecure: whether to permit passthrough mode |
| 1139 | |
| 1140 | *Security considerations*: The ``class Encrypt`` handle guarantees that |
| 1141 | all random parts (first eight bytes) of the IVs used for encrypting |
| 1142 | objects are unique. This guarantee does *not* apply across handles if |
| 1143 | multiple handles are used with the same combination of password and |
| 1144 | salt. Thus, use of multiple handles with the same combination of password |
| 1145 | and salt is subject to birthday attacks with a bound of 2^32. To avoid |
| 1146 | collisions, the application should keep the number of handles as low |
| 1147 | as possible and check for reuse by comparing the set of IVs used of all |
| 1148 | handles that were created (accessible using the ``get_used_ivs`` method). |
| 1149 | """ |
| 1150 | if password is None and key is None \ |
| 1151 | or password is not None and key is not None : |
| 1152 | raise InvalidParameter ("__init__: need either key or password") |
| 1153 | |
| 1154 | if key is not None: |
| 1155 | if isinstance (key, bytes) is False: |
| 1156 | raise InvalidParameter ("__init__: key must be provided as " |
| 1157 | "bytes, not %s" % type (key)) |
| 1158 | if nacl is None: |
| 1159 | raise InvalidParameter ("__init__: salt must be provided along " |
| 1160 | "with encryption key") |
| 1161 | else: # password, no key |
| 1162 | if isinstance (password, str) is False: |
| 1163 | raise InvalidParameter ("__init__: password must be a string, not %s" |
| 1164 | % type (password)) |
| 1165 | if len (password) == 0: |
| 1166 | raise InvalidParameter ("__init__: supplied empty password but not " |
| 1167 | "permitted for PDT encrypted files") |
| 1168 | # version |
| 1169 | if isinstance (version, int) is False: |
| 1170 | raise InvalidParameter ("__init__: version number must be an " |
| 1171 | "integer, not %s" % type (version)) |
| 1172 | if version < 0: |
| 1173 | raise InvalidParameter ("__init__: version number must be a " |
| 1174 | "nonnegative integer, not %d" % version) |
| 1175 | # paramversion |
| 1176 | if isinstance (paramversion, int) is False: |
| 1177 | raise InvalidParameter ("__init__: crypto parameter version number " |
| 1178 | "must be an integer, not %s" |
| 1179 | % type (paramversion)) |
| 1180 | if paramversion < 0: |
| 1181 | raise InvalidParameter ("__init__: crypto parameter version number " |
| 1182 | "must be a nonnegative integer, not %d" |
| 1183 | % paramversion) |
| 1184 | # salt |
| 1185 | if nacl is not None: |
| 1186 | if isinstance (nacl, bytes) is False: |
| 1187 | raise InvalidParameter ("__init__: salt given, but of type %s " |
| 1188 | "instead of bytes" % type (nacl)) |
| 1189 | # salt length would depend on the actual encryption so it can’t be |
| 1190 | # validated at this point |
| 1191 | self.fixed = [ ] |
| 1192 | self.version = version |
| 1193 | self.paramenc = ENCRYPTION_PARAMETERS.get (paramversion) ["enc"] |
| 1194 | |
| 1195 | super().__init__ (password, key, paramversion, nacl, counter=counter, |
| 1196 | strict_ivs=strict_ivs, insecure=insecure) |
| 1197 | |
| 1198 | |
| 1199 | def next_fixed (self, retries=PDTCRYPT_IV_GEN_MAX_RETRIES): |
| 1200 | """ |
| 1201 | Generate the next IV fixed part by reading eight bytes from |
| 1202 | ``/dev/urandom``. The buffer so obtained is tested against the fixed |
| 1203 | parts used so far to prevent accidental reuse of IVs. After a |
| 1204 | configurable number of attempts to create a unique fixed part, it will |
| 1205 | refuse to continue with an ``IVFixedPartError``. This is unlikely to |
| 1206 | ever happen on a normal system but may detect an issue with the random |
| 1207 | generator. |
| 1208 | |
| 1209 | The list of fixed parts that were used by the context at hand can be |
| 1210 | accessed through the ``.fixed`` list. Its last element is the fixed |
| 1211 | part currently in use. |
| 1212 | """ |
| 1213 | i = 0 |
| 1214 | while i < retries: |
| 1215 | fp = os.urandom (PDTCRYPT_IV_FIXEDPART_SIZE) |
| 1216 | if fp not in self.fixed: |
| 1217 | self.fixed.append (fp) |
| 1218 | return |
| 1219 | i += 1 |
| 1220 | raise IVFixedPartError ("error obtaining a unique IV fixed part from " |
| 1221 | "/dev/urandom; giving up after %d tries" % i) |
| 1222 | |
| 1223 | |
| 1224 | def iv_make (self): |
| 1225 | """ |
| 1226 | Construct a 12-bytes IV from the current fixed part and the object |
| 1227 | counter. |
| 1228 | """ |
| 1229 | return struct.pack(FMT_I2N_IV, self.fixed [-1], self.cnt) |
| 1230 | |
| 1231 | |
| 1232 | def next (self, filename=None, counter=None): |
| 1233 | """ |
| 1234 | Prepare for encrypting the next incoming object. Update the counter |
| 1235 | and put together the IV, possibly changing prefixes. Then create the |
| 1236 | new encryptor. |
| 1237 | |
| 1238 | The argument ``counter`` can be used to specify a file counter for this |
| 1239 | object. Unless it is one of the reserved values, the counter of |
| 1240 | subsequent objects will be computed from this one. |
| 1241 | |
| 1242 | If this is the first object in a series, ``filename`` is required, |
| 1243 | otherwise it is reused if not present. The value is used to derive a |
| 1244 | header sized placeholder to use until after encryption when all the |
| 1245 | inputs to construct the final header are available. This is then |
| 1246 | matched in ``.done()`` against the value found at the position of the |
| 1247 | header. The motivation for this extra check is primarily to assist |
| 1248 | format debugging: It makes stray headers easy to spot in malformed |
| 1249 | PDTCRYPT files. |
| 1250 | """ |
| 1251 | if filename is None: |
| 1252 | if self.lastinfo is None: |
| 1253 | raise InvalidParameter ("next: filename is mandatory for " |
| 1254 | "first object") |
| 1255 | filename, _dummy = self.lastinfo |
| 1256 | else: |
| 1257 | if isinstance (filename, str) is False: |
| 1258 | raise InvalidParameter ("next: filename must be a string, no %s" |
| 1259 | % type (filename)) |
| 1260 | if counter is not None: |
| 1261 | if isinstance (counter, int) is False: |
| 1262 | raise InvalidParameter ("next: the supplied counter is of " |
| 1263 | "invalid type %s; please pass an " |
| 1264 | "integer instead" % type (counter)) |
| 1265 | self.set_object_counter (counter) |
| 1266 | |
| 1267 | self.iv = self.iv_make () |
| 1268 | if self.paramenc == "aes-gcm": |
| 1269 | self.enc = Cipher \ |
| 1270 | ( algorithms.AES (self.key) |
| 1271 | , modes.GCM (self.iv) |
| 1272 | , backend = default_backend ()) \ |
| 1273 | .encryptor () |
| 1274 | elif self.paramenc == "passthrough": |
| 1275 | self.enc = PassthroughCipher () |
| 1276 | else: |
| 1277 | raise InvalidParameter ("next: parameter version %d not known" |
| 1278 | % self.paramversion) |
| 1279 | hdrdum = hdr_make_dummy (filename) |
| 1280 | self.lastinfo = (filename, hdrdum) |
| 1281 | |
| 1282 | self.check_duplicate_iv (self.iv) |
| 1283 | |
| 1284 | super().next (self.password, self.paramversion, self.nacl) |
| 1285 | |
| 1286 | self.set_object_counter (self.cnt + 1) |
| 1287 | return hdrdum |
| 1288 | |
| 1289 | |
| 1290 | def check_duplicate_iv (self, iv): |
| 1291 | """ |
| 1292 | Add an IV (the 12 byte representation as in the header) to the list. With |
| 1293 | strict checking enabled, this will throw a ``DuplicateIV``. Depending on |
| 1294 | the context, this may indicate a serious error (IV reuse). |
| 1295 | |
| 1296 | IVs are only tracked in strict_ivs mode. |
| 1297 | """ |
| 1298 | if self.strict_ivs is False: |
| 1299 | return |
| 1300 | |
| 1301 | if iv in self.used_ivs: |
| 1302 | raise DuplicateIV ("iv %s was reused" % iv_fmt (iv)) |
| 1303 | # vi has not been used before; add to collection |
| 1304 | self.used_ivs.add (iv) |
| 1305 | |
| 1306 | |
| 1307 | def done (self, cmpdata): |
| 1308 | """ |
| 1309 | Complete encryption of an object. After this has been called, attempts |
| 1310 | of encrypting further data will cause an error until ``.next()`` is |
| 1311 | invoked properly. |
| 1312 | |
| 1313 | Returns a 64 bytes buffer containing the object header including all |
| 1314 | values including the “late” ones e. g. the ciphertext size and the |
| 1315 | GCM tag. |
| 1316 | """ |
| 1317 | if isinstance (cmpdata, bytes) is False: |
| 1318 | raise InvalidParameter ("done: comparison input expected as bytes, " |
| 1319 | "not %s" % type (cmpdata)) |
| 1320 | if self.lastinfo is None: |
| 1321 | raise RuntimeError ("done: encryption context not initialized") |
| 1322 | filename, hdrdum = self.lastinfo |
| 1323 | if cmpdata != hdrdum: |
| 1324 | raise RuntimeError ("done: bad sync of header for object %d: " |
| 1325 | "preliminary data does not match; this likely " |
| 1326 | "indicates a wrongly repositioned stream" |
| 1327 | % self.cnt) |
| 1328 | data = self.enc.finalize () |
| 1329 | self.stats ["out"] += len (data) |
| 1330 | self.ctsize += len (data) |
| 1331 | ok, hdr = hdr_from_params (self.version, self.paramversion, self.nacl, |
| 1332 | self.iv, self.ctsize, self.enc.tag) |
| 1333 | if ok is False: |
| 1334 | raise InternalError ("error constructing header: %r" % hdr) |
| 1335 | return data, hdr, self.fixed |
| 1336 | |
| 1337 | |
| 1338 | def process (self, buf): |
| 1339 | """ |
| 1340 | Encrypt a chunk of plaintext with the active encryptor. Returns the |
| 1341 | size of the input consumed. This **must** be checked downstream. If the |
| 1342 | maximum possible object size has been reached, the current context must |
| 1343 | be finalized and a new one established before any further data can be |
| 1344 | encrypted. The second argument is the remainder of the plaintext that |
| 1345 | was not encrypted for the caller to use immediately after the new |
| 1346 | context is ready. |
| 1347 | """ |
| 1348 | if isinstance (buf, bytes) is False: |
| 1349 | raise InvalidParameter ("process: expected byte buffer, not %s" |
| 1350 | % type (buf)) |
| 1351 | bsize = len (buf) |
| 1352 | newptsize = self.ptsize + bsize |
| 1353 | diff = newptsize - PDTCRYPT_MAX_OBJ_SIZE |
| 1354 | if diff > 0: |
| 1355 | bsize -= diff |
| 1356 | newptsize = PDTCRYPT_MAX_OBJ_SIZE |
| 1357 | self.ptsize = newptsize |
| 1358 | data = super().process (buf [:bsize]) |
| 1359 | self.ctsize += len (data) |
| 1360 | return bsize, data |
| 1361 | |
| 1362 | |
| 1363 | class Decrypt (Crypto): |
| 1364 | |
| 1365 | tag = None # GCM tag, part of header |
| 1366 | last_iv = None # check consecutive ivs in strict mode |
| 1367 | hdr_ctsize = -1 |
| 1368 | |
| 1369 | def __init__ (self, password=None, key=None, counter=None, fixedparts=None, |
| 1370 | strict_ivs=True, insecure=False): |
| 1371 | """ |
| 1372 | Sanitizing ctor for the decryption context. ``fixedparts`` specifies a |
| 1373 | list of IV fixed parts accepted during decryption. If a fixed part is |
| 1374 | encountered that is not in the list, decryption will fail. |
| 1375 | |
| 1376 | :param password: mutually exclusive with ``key`` |
| 1377 | :type password: bytes |
| 1378 | :param key: mutually exclusive with ``password`` |
| 1379 | :type key: bytes |
| 1380 | :type counter: initial object counter the values |
| 1381 | ``AES_GCM_IV_CNT_INFOFILE`` and |
| 1382 | ``AES_GCM_IV_CNT_INDEX`` are unique in each backup set |
| 1383 | and cannot be reused even with different fixed parts. |
| 1384 | :type fixedparts: bytes list |
| 1385 | :type strict_ivs: bool |
| 1386 | :param strict_ivs: fail if IVs of decrypted objects are not linearly |
| 1387 | increasing |
| 1388 | :type insecure: bool |
| 1389 | :param insecure: whether to process objects encrypted in |
| 1390 | passthrough mode (*``paramversion`` < 1*) |
| 1391 | |
| 1392 | *Security considerations*: The ``strict_ivs`` setting protects against |
| 1393 | ciphertext reordering and injection attacks. For this to work it relies |
| 1394 | on a property of how the object counters are created during encryption. |
| 1395 | If multiple ``Encrypt`` handles have been used during encryption, this |
| 1396 | is property is unlikely to apply as it would require manual management |
| 1397 | of counters across Encrypt handles. In these cases it may thus be |
| 1398 | necessary to disable the ```strict_ivs`` protection. |
| 1399 | """ |
| 1400 | if password is None and key is None \ |
| 1401 | or password is not None and key is not None : |
| 1402 | raise InvalidParameter ("__init__: need either key or password") |
| 1403 | |
| 1404 | if key is not None: |
| 1405 | if isinstance (key, bytes) is False: |
| 1406 | raise InvalidParameter ("__init__: key must be provided as " |
| 1407 | "bytes, not %s" % type (key)) |
| 1408 | else: # password, no key |
| 1409 | if isinstance (password, str) is False: |
| 1410 | raise InvalidParameter ("__init__: password must be a string, not %s" |
| 1411 | % type (password)) |
| 1412 | if len (password) == 0: |
| 1413 | raise InvalidParameter ("__init__: supplied empty password but not " |
| 1414 | "permitted for PDT encrypted files") |
| 1415 | # fixed parts |
| 1416 | if fixedparts is not None: |
| 1417 | if isinstance (fixedparts, list) is False: |
| 1418 | raise InvalidParameter ("__init__: IV fixed parts must be " |
| 1419 | "supplied as list, not %s" |
| 1420 | % type (fixedparts)) |
| 1421 | self.fixed = fixedparts |
| 1422 | self.fixed.sort () |
| 1423 | |
| 1424 | super().__init__ (password=password, key=key, counter=counter, |
| 1425 | strict_ivs=strict_ivs, insecure=insecure) |
| 1426 | |
| 1427 | |
| 1428 | def valid_fixed_part (self, iv): |
| 1429 | """ |
| 1430 | Check if a fixed part was already seen. |
| 1431 | """ |
| 1432 | # check if fixed part is known |
| 1433 | fixed, _cnt = struct.unpack (FMT_I2N_IV, iv) |
| 1434 | i = bisect.bisect_left (self.fixed, fixed) |
| 1435 | return i != len (self.fixed) and self.fixed [i] == fixed |
| 1436 | |
| 1437 | |
| 1438 | def reset_last_iv (self): |
| 1439 | """ |
| 1440 | Force a new IV sequence start. The last IV counter will be set from the |
| 1441 | next IV encountered and the check for consecutive IVs will be suppressed. |
| 1442 | |
| 1443 | The intended use is backup volume boundaries or handling batches of |
| 1444 | objects encrypted with ``Encrypt`` handles initialized with different |
| 1445 | initial counter values. |
| 1446 | """ |
| 1447 | self.last_iv = None |
| 1448 | |
| 1449 | def check_consecutive_iv (self, iv): |
| 1450 | """ |
| 1451 | Check whether the counter part of the given IV is indeed the successor |
| 1452 | of the currently present counter. This should always be the case for |
| 1453 | the objects in a well formed PDT archive but should not be enforced |
| 1454 | when decrypting out-of-order. |
| 1455 | """ |
| 1456 | fixed, cnt = struct.unpack (FMT_I2N_IV, iv) |
| 1457 | if self.strict_ivs is True \ |
| 1458 | and self.last_iv is not None \ |
| 1459 | and self.last_iv [0] == fixed \ |
| 1460 | and self.last_iv [1] + 1 != cnt: |
| 1461 | raise NonConsecutiveIV ("iv %s counter not successor of " |
| 1462 | "last object (expected %d, found %d)" |
| 1463 | % (iv_fmt (iv), self.last_iv [1] + 1, cnt)) |
| 1464 | self.last_iv = (fixed, cnt) |
| 1465 | |
| 1466 | |
| 1467 | def next (self, hdr): |
| 1468 | """ |
| 1469 | Start decrypting the next object. The PDTCRYPT header for the object |
| 1470 | can be given either as already parsed object or as bytes. |
| 1471 | """ |
| 1472 | if isinstance (hdr, bytes) is True: |
| 1473 | hdr = hdr_read (hdr) |
| 1474 | elif isinstance (hdr, dict) is False: |
| 1475 | # this won’t catch malformed specs though |
| 1476 | raise InvalidParameter ("next: wrong type of parameter hdr: " |
| 1477 | "expected bytes or spec, got %s" |
| 1478 | % type (hdr)) |
| 1479 | try: |
| 1480 | paramversion = hdr ["paramversion"] |
| 1481 | nacl = hdr ["nacl"] |
| 1482 | iv = hdr ["iv"] |
| 1483 | tag = hdr ["tag"] |
| 1484 | ctsize = hdr ["ctsize"] |
| 1485 | except KeyError: |
| 1486 | raise InvalidHeader ("next: not a header %r" % hdr) |
| 1487 | |
| 1488 | if ctsize > PDTCRYPT_MAX_OBJ_SIZE: |
| 1489 | raise InvalidHeader ("next: ciphertext size %d exceeds maximum " |
| 1490 | "object size (%d)" |
| 1491 | % (ctsize, PDTCRYPT_MAX_OBJ_SIZE)) |
| 1492 | |
| 1493 | self.hdr_ctsize = ctsize |
| 1494 | |
| 1495 | super().next (self.password, paramversion, nacl) |
| 1496 | if self.fixed is not None and self.valid_fixed_part (iv) is False: |
| 1497 | raise InvalidIVFixedPart ("iv %s has invalid fixed part" |
| 1498 | % iv_fmt (iv)) |
| 1499 | |
| 1500 | self.check_consecutive_iv (iv) |
| 1501 | |
| 1502 | self.tag = tag |
| 1503 | defs = ENCRYPTION_PARAMETERS.get (paramversion, None) |
| 1504 | if defs is None: |
| 1505 | raise FormatError ("header contains unknown parameter version %d; " |
| 1506 | "maybe the file was created by a more recent " |
| 1507 | "version of Deltatar" % paramversion) |
| 1508 | enc = defs ["enc"] |
| 1509 | if enc == "aes-gcm": |
| 1510 | self.enc = Cipher \ |
| 1511 | ( algorithms.AES (self.key) |
| 1512 | , modes.GCM (iv, tag=self.tag) |
| 1513 | , backend = default_backend ()) \ |
| 1514 | . decryptor () |
| 1515 | elif enc == "passthrough": |
| 1516 | self.enc = PassthroughCipher () |
| 1517 | else: |
| 1518 | raise InternalError ("encryption parameter set %d refers to unknown " |
| 1519 | "mode %r" % (paramversion, enc)) |
| 1520 | self.set_object_counter (self.cnt + 1) |
| 1521 | |
| 1522 | |
| 1523 | def done (self, tag=None): |
| 1524 | """ |
| 1525 | Stop decryption of the current object and finalize it with the active |
| 1526 | context. This will throw an *InvalidGCMTag* exception to indicate that |
| 1527 | the authentication tag does not match the data. If the tag is correct, |
| 1528 | the rest of the plaintext is returned. |
| 1529 | """ |
| 1530 | data = b"" |
| 1531 | try: |
| 1532 | if tag is None: |
| 1533 | data = self.enc.finalize () |
| 1534 | else: |
| 1535 | if isinstance (tag, bytes) is False: |
| 1536 | raise InvalidParameter ("done: wrong type of parameter " |
| 1537 | "tag: expected bytes, got %s" |
| 1538 | % type (tag)) |
| 1539 | data = self.enc.finalize_with_tag (self.tag) |
| 1540 | except cryptography.exceptions.InvalidTag: |
| 1541 | raise InvalidGCMTag ("done: tag mismatch of object %d: %s " |
| 1542 | "rejected by finalize ()" |
| 1543 | % (self.cnt, binascii.hexlify (self.tag))) |
| 1544 | self.ptsize += len (data) |
| 1545 | self.stats ["out"] += len (data) |
| 1546 | |
| 1547 | assert self.ctsize == self.ptsize == self.hdr_ctsize |
| 1548 | |
| 1549 | return data |
| 1550 | |
| 1551 | |
| 1552 | def process (self, buf): |
| 1553 | """ |
| 1554 | Decrypt the bytes object *buf* with the active decryptor. |
| 1555 | """ |
| 1556 | if isinstance (buf, bytes) is False: |
| 1557 | raise InvalidParameter ("process: expected byte buffer, not %s" |
| 1558 | % type (buf)) |
| 1559 | self.ctsize += len (buf) |
| 1560 | if self.ctsize > self.hdr_ctsize: |
| 1561 | raise CiphertextTooLong ("process: object length exceeded: got " |
| 1562 | "%d B but header specfiies %d B" |
| 1563 | % (self.ctsize, self.hdr_ctsize)) |
| 1564 | |
| 1565 | data = super().process (buf) |
| 1566 | self.ptsize += len (data) |
| 1567 | return data |
| 1568 | |
| 1569 | |
| 1570 | ############################################################################### |
| 1571 | ## testing helpers |
| 1572 | ############################################################################### |
| 1573 | |
| 1574 | def _patch_global (glob, vow, n=None): |
| 1575 | """ |
| 1576 | Adapt upper file counter bound for testing IV logic. Completely unsafe. |
| 1577 | """ |
| 1578 | assert vow == "I am fully aware that this will void my warranty." |
| 1579 | r = globals () [glob] |
| 1580 | if n is None: |
| 1581 | n = globals () [glob + "_DEFAULT"] |
| 1582 | globals () [glob] = n |
| 1583 | return r |
| 1584 | |
| 1585 | _testing_set_AES_GCM_IV_CNT_MAX = \ |
| 1586 | partial (_patch_global, "AES_GCM_IV_CNT_MAX") |
| 1587 | |
| 1588 | _testing_set_PDTCRYPT_MAX_OBJ_SIZE = \ |
| 1589 | partial (_patch_global, "PDTCRYPT_MAX_OBJ_SIZE") |
| 1590 | |
| 1591 | def open2_dump_file (fname, dir_fd, force=False): |
| 1592 | outfd = -1 |
| 1593 | |
| 1594 | oflags = os.O_CREAT | os.O_WRONLY |
| 1595 | if force is True: |
| 1596 | oflags |= os.O_TRUNC |
| 1597 | else: |
| 1598 | oflags |= os.O_EXCL |
| 1599 | |
| 1600 | try: |
| 1601 | outfd = os.open (fname, oflags, |
| 1602 | stat.S_IRUSR | stat.S_IWUSR, dir_fd=dir_fd) |
| 1603 | except FileExistsError as exn: |
| 1604 | noise ("PDT: refusing to overwrite existing file %s" % fname) |
| 1605 | noise ("") |
| 1606 | raise RuntimeError ("destination file %s already exists" % fname) |
| 1607 | if PDTCRYPT_VERBOSE is True: |
| 1608 | noise ("PDT: new output file %s (fd=%d)" % (fname, outfd)) |
| 1609 | |
| 1610 | return outfd |
| 1611 | |
| 1612 | ############################################################################### |
| 1613 | ## freestanding invocation |
| 1614 | ############################################################################### |
| 1615 | |
| 1616 | PDTCRYPT_SUB_PROCESS = 0 |
| 1617 | PDTCRYPT_SUB_SCRYPT = 1 |
| 1618 | PDTCRYPT_SUB_SCAN = 2 |
| 1619 | PDTCRYPT_SUB_IVCHECK = 3 |
| 1620 | |
| 1621 | PDTCRYPT_SUB = \ |
| 1622 | { "process" : PDTCRYPT_SUB_PROCESS |
| 1623 | , "scrypt" : PDTCRYPT_SUB_SCRYPT |
| 1624 | , "scan" : PDTCRYPT_SUB_SCAN |
| 1625 | , "ivcheck" : PDTCRYPT_SUB_IVCHECK } |
| 1626 | |
| 1627 | PDTCRYPT_DECRYPT = 1 << 0 # decrypt archive with password |
| 1628 | PDTCRYPT_SPLIT = 1 << 1 # split archive into individual objects |
| 1629 | PDTCRYPT_HASH = 1 << 2 # output scrypt hash for file and given password |
| 1630 | |
| 1631 | PDTCRYPT_SPLITNAME = "pdtcrypt-object-%d.bin" |
| 1632 | PDTCRYPT_RESCUENAME = "pdtcrypt-rescue-object-%0.5d.bin" |
| 1633 | |
| 1634 | PDTCRYPT_VERBOSE = False |
| 1635 | PDTCRYPT_STRICTIVS = False |
| 1636 | PDTCRYPT_OVERWRITE = False |
| 1637 | PDTCRYPT_BLOCKSIZE = 1 << 12 |
| 1638 | PDTCRYPT_SINK = 0 |
| 1639 | PDTCRYPT_SOURCE = 1 |
| 1640 | SELF = None |
| 1641 | |
| 1642 | PDTCRYPT_DEFAULT_VER = 1 |
| 1643 | PDTCRYPT_DEFAULT_PVER = 1 |
| 1644 | |
| 1645 | # scrypt hashing output control |
| 1646 | PDTCRYPT_SCRYPT_INTRANATOR = 0 |
| 1647 | PDTCRYPT_SCRYPT_PARAMETERS = 1 |
| 1648 | PDTCRYPT_SCRYPT_DEFAULT = PDTCRYPT_SCRYPT_INTRANATOR |
| 1649 | |
| 1650 | PDTCRYPT_SCRYPT_FORMAT = \ |
| 1651 | { "i2n" : PDTCRYPT_SCRYPT_INTRANATOR |
| 1652 | , "params" : PDTCRYPT_SCRYPT_PARAMETERS } |
| 1653 | |
| 1654 | PDTCRYPT_TT_COLUMNS = 80 # assume standard terminal |
| 1655 | |
| 1656 | class PDTDecryptionError (Exception): |
| 1657 | """Decryption failed.""" |
| 1658 | |
| 1659 | class PDTSplitError (Exception): |
| 1660 | """Decryption failed.""" |
| 1661 | |
| 1662 | |
| 1663 | def noise (*a, **b): |
| 1664 | print (file=sys.stderr, *a, **b) |
| 1665 | |
| 1666 | |
| 1667 | class PassthroughDecryptor (object): |
| 1668 | |
| 1669 | curhdr = None # write current header on first data write |
| 1670 | |
| 1671 | def __init__ (self): |
| 1672 | if PDTCRYPT_VERBOSE is True: |
| 1673 | noise ("PDT: no encryption; data passthrough") |
| 1674 | |
| 1675 | def next (self, hdr): |
| 1676 | ok, curhdr = hdr_make (hdr) |
| 1677 | if ok is False: |
| 1678 | raise PDTDecryptionError ("bad header %r" % hdr) |
| 1679 | self.curhdr = curhdr |
| 1680 | |
| 1681 | def done (self): |
| 1682 | if self.curhdr is not None: |
| 1683 | return self.curhdr |
| 1684 | return b"" |
| 1685 | |
| 1686 | def process (self, d): |
| 1687 | if self.curhdr is not None: |
| 1688 | d = self.curhdr + d |
| 1689 | self.curhdr = None |
| 1690 | return d |
| 1691 | |
| 1692 | |
| 1693 | def check_ivs (ifs): |
| 1694 | """ |
| 1695 | Walk the objects in the given reader, validating uniqueness and |
| 1696 | consecutiveness of the IVs in the object headers. |
| 1697 | |
| 1698 | As the IVs are metadata this does not require decryption. |
| 1699 | """ |
| 1700 | objs = 0 |
| 1701 | seen = set () |
| 1702 | last = None |
| 1703 | |
| 1704 | while True: |
| 1705 | try: |
| 1706 | hdr = hdr_read_stream (ifs) |
| 1707 | except EndOfFile as exn: |
| 1708 | break # done |
| 1709 | |
| 1710 | objs += 1 |
| 1711 | cur = hdr ["iv"] |
| 1712 | |
| 1713 | fixed, cnt = struct.unpack (FMT_I2N_IV, cur) |
| 1714 | |
| 1715 | if PDTCRYPT_VERBOSE is True: |
| 1716 | noise ("PDT: obj %d, iv %s" % (objs, iv_fmt (cur))) |
| 1717 | |
| 1718 | if last is not None: |
| 1719 | if fixed != last [0]: |
| 1720 | noise ("PDT: obj %d, fixed part changed last: %s → this: %s" |
| 1721 | % (obj, |
| 1722 | binascii.hexlify (last [0]), |
| 1723 | binascii.hexlify (fixed))) |
| 1724 | if cnt != last [1] + 1: |
| 1725 | raise NonConsecutiveIV ("iv %s counter not successor of " |
| 1726 | "last object (expected %d, found %d)" |
| 1727 | % (iv_fmt (cur), last [1] + 1, cnt)) |
| 1728 | |
| 1729 | if cur in seen: |
| 1730 | raise DuplicateIV ("iv %s was reused" % iv_fmt (cur)) |
| 1731 | |
| 1732 | seen.add (cur) |
| 1733 | last = (fixed, cnt) |
| 1734 | |
| 1735 | ifs.read (hdr ["ctsize"]) |
| 1736 | |
| 1737 | return objs |
| 1738 | |
| 1739 | |
| 1740 | def depdtcrypt (mode, secret, ins, outs): |
| 1741 | """ |
| 1742 | Remove PDTCRYPT layer from all objects encrypted with the secret. Used on a |
| 1743 | Deltatar backup this will yield a (possibly Gzip compressed) tarball. |
| 1744 | """ |
| 1745 | ctleft = -1 # length of ciphertext to consume |
| 1746 | ctcurrent = 0 # total ciphertext of current object |
| 1747 | total_obj = 0 # total number of objects read |
| 1748 | total_pt = 0 # total plaintext bytes |
| 1749 | total_ct = 0 # total ciphertext bytes |
| 1750 | total_read = 0 # total bytes read |
| 1751 | outfile = None # Python file object for output |
| 1752 | |
| 1753 | if mode & PDTCRYPT_DECRYPT: # decryptor |
| 1754 | ks = secret [0] |
| 1755 | if ks == PDTCRYPT_SECRET_PW: |
| 1756 | decr = Decrypt (password=secret [1], strict_ivs=PDTCRYPT_STRICTIVS) |
| 1757 | elif ks == PDTCRYPT_SECRET_KEY: |
| 1758 | key = secret [1] |
| 1759 | decr = Decrypt (key=key, strict_ivs=PDTCRYPT_STRICTIVS) |
| 1760 | else: |
| 1761 | raise InternalError ("‘%d’ does not specify a valid kind of secret" |
| 1762 | % ks) |
| 1763 | else: |
| 1764 | decr = PassthroughDecryptor () |
| 1765 | |
| 1766 | def nextout (_): |
| 1767 | """Dummy for non-split mode: output file does not vary.""" |
| 1768 | return outs |
| 1769 | |
| 1770 | if mode & PDTCRYPT_SPLIT: |
| 1771 | def nextout (outfile): |
| 1772 | """ |
| 1773 | We were passed an fd as outs for accessing the destination |
| 1774 | directory where extracted archive components are supposed |
| 1775 | to end up in. |
| 1776 | """ |
| 1777 | |
| 1778 | if outfile is None: |
| 1779 | if PDTCRYPT_VERBOSE is True: |
| 1780 | noise ("PDT: no output file to close at this point") |
| 1781 | else: |
| 1782 | if PDTCRYPT_VERBOSE is True: |
| 1783 | noise ("PDT: release output file %r" % outfile) |
| 1784 | # cleanup happens automatically by the GC; the next |
| 1785 | # line will error out on account of an invalid fd |
| 1786 | #outfile.close () |
| 1787 | |
| 1788 | assert total_obj > 0 |
| 1789 | fname = PDTCRYPT_SPLITNAME % total_obj |
| 1790 | try: |
| 1791 | outfd = open2_dump_file (fname, outs, force=PDTCRYPT_OVERWRITE) |
| 1792 | except RuntimeError as exn: |
| 1793 | raise PDTSplitError (exn) |
| 1794 | return os.fdopen (outfd, "wb", closefd=True) |
| 1795 | |
| 1796 | |
| 1797 | def tell (s): |
| 1798 | """ESPIPE is normal on non-seekable stdio stream.""" |
| 1799 | try: |
| 1800 | return s.tell () |
| 1801 | except OSError as exn: |
| 1802 | if exn.errno == errno.ESPIPE: |
| 1803 | return -1 |
| 1804 | |
| 1805 | def out (pt, outfile): |
| 1806 | npt = len (pt) |
| 1807 | nonlocal total_pt |
| 1808 | total_pt += npt |
| 1809 | if PDTCRYPT_VERBOSE is True: |
| 1810 | noise ("PDT:\t· decrypt plaintext %d B" % (npt)) |
| 1811 | try: |
| 1812 | nn = outfile.write (pt) |
| 1813 | except OSError as exn: # probably ENOSPC |
| 1814 | raise DecryptionError ("error (%s)" % exn) |
| 1815 | if nn != npt: |
| 1816 | raise DecryptionError ("write aborted after %d of %d B" % (nn, npt)) |
| 1817 | |
| 1818 | while True: |
| 1819 | if ctleft <= 0: |
| 1820 | # current object completed; in a valid archive this marks either |
| 1821 | # the start of a new header or the end of the input |
| 1822 | if ctleft == 0: # current object requires finalization |
| 1823 | if PDTCRYPT_VERBOSE is True: |
| 1824 | noise ("PDT: %d finalize" % tell (ins)) |
| 1825 | try: |
| 1826 | pt = decr.done () |
| 1827 | except InvalidGCMTag as exn: |
| 1828 | raise DecryptionError ("error finalizing object %d (%d B): " |
| 1829 | "%r" % (total_obj, len (pt), exn)) \ |
| 1830 | from exn |
| 1831 | out (pt, outfile) |
| 1832 | if PDTCRYPT_VERBOSE is True: |
| 1833 | noise ("PDT:\t· object validated") |
| 1834 | |
| 1835 | if PDTCRYPT_VERBOSE is True: |
| 1836 | noise ("PDT: %d hdr" % tell (ins)) |
| 1837 | try: |
| 1838 | hdr = hdr_read_stream (ins) |
| 1839 | total_read += PDTCRYPT_HDR_SIZE |
| 1840 | except EndOfFile as exn: |
| 1841 | total_read += exn.remainder |
| 1842 | if total_ct + total_obj * PDTCRYPT_HDR_SIZE != total_read: |
| 1843 | raise PDTDecryptionError ("ciphertext processed (%d B) plus " |
| 1844 | "overhead (%d × %d B) does not match " |
| 1845 | "the number of bytes read (%d )" |
| 1846 | % (total_ct, total_obj, PDTCRYPT_HDR_SIZE, |
| 1847 | total_read)) |
| 1848 | # the single good exit |
| 1849 | return total_read, total_obj, total_ct, total_pt |
| 1850 | except InvalidHeader as exn: |
| 1851 | raise PDTDecryptionError ("invalid header at position %d in %r " |
| 1852 | "(%s)" % (tell (ins), exn, ins)) |
| 1853 | if PDTCRYPT_VERBOSE is True: |
| 1854 | pretty = hdr_fmt_pretty (hdr) |
| 1855 | noise (reduce (lambda a, e: (a + "\n" if a else "") + "PDT:\t· " + e, |
| 1856 | pretty.splitlines (), "")) |
| 1857 | ctcurrent = ctleft = hdr ["ctsize"] |
| 1858 | |
| 1859 | decr.next (hdr) |
| 1860 | |
| 1861 | total_obj += 1 # used in file counter with split mode |
| 1862 | |
| 1863 | # finalization complete or skipped in case of first object in |
| 1864 | # stream; create a new output file if necessary |
| 1865 | outfile = nextout (outfile) |
| 1866 | |
| 1867 | if PDTCRYPT_VERBOSE is True: |
| 1868 | noise ("PDT: %d decrypt obj no. %d, %d B" |
| 1869 | % (tell (ins), total_obj, ctleft)) |
| 1870 | |
| 1871 | # always allocate a new buffer since python-cryptography doesn’t allow |
| 1872 | # passing a bytearray :/ |
| 1873 | nexpect = min (ctleft, PDTCRYPT_BLOCKSIZE) |
| 1874 | if PDTCRYPT_VERBOSE is True: |
| 1875 | noise ("PDT:\t· [%d] %d%% done, read block (%d B of %d B remaining)" |
| 1876 | % (tell (ins), |
| 1877 | 100 - ctleft * 100 / (ctcurrent > 0 and ctcurrent or 1), |
| 1878 | nexpect, ctleft)) |
| 1879 | ct = ins.read (nexpect) |
| 1880 | nct = len (ct) |
| 1881 | if nct < nexpect: |
| 1882 | off = tell (ins) |
| 1883 | raise EndOfFile (nct, |
| 1884 | "hit EOF after %d of %d B in block [%d:%d); " |
| 1885 | "%d B ciphertext remaining for object no %d" |
| 1886 | % (nct, nexpect, off, off + nexpect, ctleft, |
| 1887 | total_obj)) |
| 1888 | ctleft -= nct |
| 1889 | total_ct += nct |
| 1890 | total_read += nct |
| 1891 | |
| 1892 | if PDTCRYPT_VERBOSE is True: |
| 1893 | noise ("PDT:\t· decrypt ciphertext %d B" % (nct)) |
| 1894 | pt = decr.process (ct) |
| 1895 | out (pt, outfile) |
| 1896 | |
| 1897 | |
| 1898 | def deptdcrypt_mk_stream (kind, path): |
| 1899 | """Create stream from file or stdio descriptor.""" |
| 1900 | if kind == PDTCRYPT_SINK: |
| 1901 | if path == "-": |
| 1902 | if PDTCRYPT_VERBOSE is True: noise ("PDT: sink: stdout") |
| 1903 | return sys.stdout.buffer |
| 1904 | else: |
| 1905 | if PDTCRYPT_VERBOSE is True: noise ("PDT: sink: file %s" % path) |
| 1906 | return io.FileIO (path, "w") |
| 1907 | if kind == PDTCRYPT_SOURCE: |
| 1908 | if path == "-": |
| 1909 | if PDTCRYPT_VERBOSE is True: noise ("PDT: source: stdin") |
| 1910 | return sys.stdin.buffer |
| 1911 | else: |
| 1912 | if PDTCRYPT_VERBOSE is True: noise ("PDT: source: file %s" % path) |
| 1913 | return io.FileIO (path, "r") |
| 1914 | |
| 1915 | raise ValueError ("bogus stream “%s” / %s" % (kind, path)) |
| 1916 | |
| 1917 | |
| 1918 | def mode_depdtcrypt (mode, secret, ins, outs): |
| 1919 | try: |
| 1920 | total_read, total_obj, total_ct, total_pt = \ |
| 1921 | depdtcrypt (mode, secret, ins, outs) |
| 1922 | except DecryptionError as exn: |
| 1923 | noise ("PDT: Decryption failed:") |
| 1924 | noise ("PDT:") |
| 1925 | noise ("PDT: “%s”" % exn) |
| 1926 | noise ("PDT:") |
| 1927 | noise ("PDT: Did you specify the correct key / password?") |
| 1928 | noise ("") |
| 1929 | return 1 |
| 1930 | except PDTSplitError as exn: |
| 1931 | noise ("PDT: Split operation failed:") |
| 1932 | noise ("PDT:") |
| 1933 | noise ("PDT: “%s”" % exn) |
| 1934 | noise ("PDT:") |
| 1935 | noise ("PDT: Hint: target directory should be empty.") |
| 1936 | noise ("") |
| 1937 | return 1 |
| 1938 | |
| 1939 | if PDTCRYPT_VERBOSE is True: |
| 1940 | noise ("PDT: decryption successful" ) |
| 1941 | noise ("PDT: %.10d bytes read" % total_read) |
| 1942 | noise ("PDT: %.10d objects decrypted" % total_obj ) |
| 1943 | noise ("PDT: %.10d bytes ciphertext" % total_ct ) |
| 1944 | noise ("PDT: %.10d bytes plaintext" % total_pt ) |
| 1945 | noise ("" ) |
| 1946 | |
| 1947 | return 0 |
| 1948 | |
| 1949 | |
| 1950 | def mode_scrypt (pw, ins=None, nacl=None, fmt=PDTCRYPT_SCRYPT_INTRANATOR): |
| 1951 | hsh = None |
| 1952 | paramversion = PDTCRYPT_DEFAULT_PVER |
| 1953 | if ins is not None: |
| 1954 | hsh, nacl, version, paramversion = scrypt_hashsource (pw, ins) |
| 1955 | defs = ENCRYPTION_PARAMETERS.get(paramversion, None) |
| 1956 | else: |
| 1957 | nacl = binascii.unhexlify (nacl) |
| 1958 | defs = ENCRYPTION_PARAMETERS.get(paramversion, None) |
| 1959 | version = PDTCRYPT_DEFAULT_VER |
| 1960 | |
| 1961 | kdfname, params = defs ["kdf"] |
| 1962 | if hsh is None: |
| 1963 | kdf = kdf_by_version (None, defs) |
| 1964 | hsh, _void = kdf (pw, nacl) |
| 1965 | |
| 1966 | import json |
| 1967 | |
| 1968 | if fmt == PDTCRYPT_SCRYPT_INTRANATOR: |
| 1969 | out = json.dumps ({ "salt" : base64.b64encode (nacl).decode () |
| 1970 | , "key" : base64.b64encode (hsh) .decode () |
| 1971 | , "paramversion" : paramversion }) |
| 1972 | elif fmt == PDTCRYPT_SCRYPT_PARAMETERS: |
| 1973 | out = json.dumps ({ "salt" : binascii.hexlify (nacl).decode () |
| 1974 | , "key" : binascii.hexlify (hsh) .decode () |
| 1975 | , "version" : version |
| 1976 | , "scrypt_params" : { "N" : params ["N"] |
| 1977 | , "r" : params ["r"] |
| 1978 | , "p" : params ["p"] |
| 1979 | , "dkLen" : params ["dkLen"] } }) |
| 1980 | else: |
| 1981 | raise RuntimeError ("bad scrypt output scheme %r" % fmt) |
| 1982 | |
| 1983 | print (out) |
| 1984 | |
| 1985 | |
| 1986 | def noise_output_candidates (cands, indent=8, cols=PDTCRYPT_TT_COLUMNS): |
| 1987 | """ |
| 1988 | Print a list of offsets without garbling the terminal too much. |
| 1989 | |
| 1990 | The indent is counted from column zero; if it is wide enough, the “PDT: ” |
| 1991 | marker will be prepended, considered part of the indentation. |
| 1992 | """ |
| 1993 | wd = cols - 1 |
| 1994 | nc = len (cands) |
| 1995 | idt = " " * indent if indent < 5 else "PDT: " + " " * (indent - 5) |
| 1996 | line = idt |
| 1997 | lpos = indent |
| 1998 | sep = "," |
| 1999 | lsep = len (sep) |
| 2000 | init = True # prevent leading separator |
| 2001 | |
| 2002 | if indent >= wd: |
| 2003 | raise ValueError ("the requested indentation exceeds the line " |
| 2004 | "width by %d" % (indent - wd)) |
| 2005 | |
| 2006 | for n in cands: |
| 2007 | ns = "%d" % n |
| 2008 | lns = len (ns) |
| 2009 | if init is False: |
| 2010 | line += sep |
| 2011 | lpos += lsep |
| 2012 | |
| 2013 | lpos += lns |
| 2014 | if lpos > wd: # line break |
| 2015 | noise (line) |
| 2016 | line = idt |
| 2017 | lpos = indent + lns |
| 2018 | elif init is True: |
| 2019 | init = False |
| 2020 | else: # space |
| 2021 | line += ' ' |
| 2022 | lpos += 1 |
| 2023 | |
| 2024 | line += ns |
| 2025 | |
| 2026 | if lpos != indent: |
| 2027 | noise (line) |
| 2028 | |
| 2029 | |
| 2030 | SLICE_START = 1 # ordering is important to have starts of intervals |
| 2031 | SLICE_END = 0 # sorted before equal ends |
| 2032 | |
| 2033 | def find_overlaps (slices): |
| 2034 | """ |
| 2035 | Find overlapping slices: iterate open/close points of intervals, tracking |
| 2036 | the ones open at any time. |
| 2037 | """ |
| 2038 | bounds = [] |
| 2039 | inside = set () # of indices into bounds |
| 2040 | ovrlp = set () # of indices into bounds |
| 2041 | |
| 2042 | for i, s in enumerate (slices): |
| 2043 | bounds.append ((s [0], SLICE_START, i)) |
| 2044 | bounds.append ((s [1], SLICE_END , i)) |
| 2045 | bounds = sorted (bounds) |
| 2046 | |
| 2047 | for val in bounds: |
| 2048 | i = val [2] |
| 2049 | if val [1] == SLICE_START: |
| 2050 | inside.add (i) |
| 2051 | else: |
| 2052 | if len (inside) > 1: # closing one that overlapped |
| 2053 | ovrlp |= inside |
| 2054 | inside.remove (i) |
| 2055 | |
| 2056 | return [ slices [i] for i in ovrlp ] |
| 2057 | |
| 2058 | |
| 2059 | def mode_ivcheck (ifd): |
| 2060 | total_obj = 0 |
| 2061 | try: |
| 2062 | total_obj = check_ivs (ifd) |
| 2063 | except (NonConsecutiveIV, DuplicateIV) as exn: |
| 2064 | noise ("PDT: Detected inconsistent initialization vectors") |
| 2065 | noise ("PDT:") |
| 2066 | noise ("PDT: “%s”" % exn) |
| 2067 | noise ("PDT:") |
| 2068 | noise ("") |
| 2069 | return 1 |
| 2070 | except Exception as exn: |
| 2071 | noise ("PDT: Hit an error unrelated to checking IVs") |
| 2072 | noise ("PDT:") |
| 2073 | noise ("PDT: “%s”" % exn) |
| 2074 | noise ("PDT:") |
| 2075 | return 1 |
| 2076 | |
| 2077 | noise ("PDT: Successfully traversed %d encrypted objects in input." |
| 2078 | % total_obj) |
| 2079 | noise ("PDT:") |
| 2080 | noise ("PDT: All IVs consecutive and unique.") |
| 2081 | |
| 2082 | |
| 2083 | def mode_scan (secret, fname, outs=None, nacl=None): |
| 2084 | """ |
| 2085 | Dissect a binary file, looking for PDTCRYPT headers and objects. |
| 2086 | |
| 2087 | If *outs* is supplied, recoverable data will be dumped into the specified |
| 2088 | directory. |
| 2089 | """ |
| 2090 | try: |
| 2091 | ifd = os.open (fname, os.O_RDONLY) |
| 2092 | except FileNotFoundError: |
| 2093 | noise ("PDT: failed to open %s readonly" % fname) |
| 2094 | noise ("") |
| 2095 | usage (err=True) |
| 2096 | |
| 2097 | try: |
| 2098 | if PDTCRYPT_VERBOSE is True: |
| 2099 | noise ("PDT: scan for potential sync points") |
| 2100 | cands = locate_hdr_candidates (ifd) |
| 2101 | if len (cands) == 0: |
| 2102 | noise ("PDT: scan complete: input does not contain potential PDT " |
| 2103 | "headers; giving up.") |
| 2104 | return -1 |
| 2105 | if PDTCRYPT_VERBOSE is True: |
| 2106 | noise ("PDT: scan complete: found %d candidates:" % len (cands)) |
| 2107 | noise_output_candidates (cands) |
| 2108 | except: |
| 2109 | os.close (ifd) |
| 2110 | raise |
| 2111 | |
| 2112 | junk, todo, slices = [], [], [] |
| 2113 | try: |
| 2114 | nobj = 0 |
| 2115 | for cand in cands: |
| 2116 | nobj += 1 |
| 2117 | vdt, hdr = inspect_hdr (ifd, cand) |
| 2118 | |
| 2119 | vdts = verdict_fmt (vdt) |
| 2120 | |
| 2121 | if vdt == HDR_CAND_JUNK: |
| 2122 | noise ("PDT: obj %d: %s object: bad header, skipping" % vdts) |
| 2123 | junk.append (cand) |
| 2124 | else: |
| 2125 | off0 = cand + PDTCRYPT_HDR_SIZE |
| 2126 | if PDTCRYPT_VERBOSE is True: |
| 2127 | noise ("PDT: obj %d: read payload @%d" % (nobj, off0)) |
| 2128 | pretty = hdr_fmt_pretty (hdr) |
| 2129 | noise (reduce (lambda a, e: (a + "\n" if a else "") + "PDT:\t· " + e, |
| 2130 | pretty.splitlines (), "")) |
| 2131 | |
| 2132 | ofd = -1 |
| 2133 | if outs is not None: |
| 2134 | ofname = PDTCRYPT_RESCUENAME % nobj |
| 2135 | ofd = open2_dump_file (ofname, outs, force=PDTCRYPT_OVERWRITE) |
| 2136 | |
| 2137 | ctsize = hdr ["ctsize"] |
| 2138 | try: |
| 2139 | l = try_decrypt (ifd, off0, hdr, secret, ofd=ofd) |
| 2140 | ok = l == ctsize |
| 2141 | slices.append ((off0, off0 + l)) |
| 2142 | finally: |
| 2143 | if ofd != -1: |
| 2144 | os.close (ofd) |
| 2145 | if vdt == HDR_CAND_GOOD and ok is True: |
| 2146 | noise ("PDT: %d → ✓ %s object %d–%d" |
| 2147 | % (cand, vdts, off0, off0 + ctsize)) |
| 2148 | elif vdt == HDR_CAND_FISHY and ok is True: |
| 2149 | noise ("PDT: %d → × %s object %d–%d, corrupt header" |
| 2150 | % (cand, vdts, off0, off0 + ctsize)) |
| 2151 | elif vdt == HDR_CAND_GOOD and ok is False: |
| 2152 | noise ("PDT: %d → × %s object %d–%d, problematic payload" |
| 2153 | % (cand, vdts, off0, off0 + ctsize)) |
| 2154 | elif vdt == HDR_CAND_FISHY and ok is False: |
| 2155 | noise ("PDT: %d → × %s object %d–%d, corrupt header, problematic " |
| 2156 | "ciphertext" % (cand, vdts, off0, off0 + ctsize)) |
| 2157 | else: |
| 2158 | raise Unreachable |
| 2159 | finally: |
| 2160 | os.close (ifd) |
| 2161 | |
| 2162 | if len (junk) == 0: |
| 2163 | noise ("PDT: all headers ok") |
| 2164 | else: |
| 2165 | noise ("PDT: %d candidates not parseable as headers:" % len (junk)) |
| 2166 | noise_output_candidates (junk) |
| 2167 | |
| 2168 | overlap = find_overlaps (slices) |
| 2169 | if len (overlap) > 0: |
| 2170 | noise ("PDT: %d objects overlapping others" % len (overlap)) |
| 2171 | for slice in overlap: |
| 2172 | noise ("PDT: × %d→%d" % (slice [0], slice [1])) |
| 2173 | |
| 2174 | |
| 2175 | def usage (err=False): |
| 2176 | out = print |
| 2177 | if err is True: |
| 2178 | out = noise |
| 2179 | indent = ' ' * len (SELF) |
| 2180 | out ("usage: %s SUBCOMMAND { --help" % SELF) |
| 2181 | out (" %s | [ -v ] { -p PASSWORD | -k KEY }" % indent) |
| 2182 | out (" %s [ { -i | --in } { - | SOURCE } ]" % indent) |
| 2183 | out (" %s [ { -n | --nacl } { SALT } ]" % indent) |
| 2184 | out (" %s [ { -o | --out } { - | DESTINATION } ]" % indent) |
| 2185 | out (" %s [ -D | --no-decrypt ] [ -S | --split ]" % indent) |
| 2186 | out (" %s [ -f | --format ]" % indent) |
| 2187 | out ("") |
| 2188 | out ("\twhere") |
| 2189 | out ("\t\tSUBCOMMAND main mode: { process | scrypt | scan | ivcheck }") |
| 2190 | out ("\t\t where:") |
| 2191 | out ("\t\t process: extract objects from PDT archive") |
| 2192 | out ("\t\t scrypt: calculate hash from password and first object") |
| 2193 | out ("\t\t scan: scan input for PDTCRYPT headers") |
| 2194 | out ("\t\t ivcheck: check whether IVs are consecutive") |
| 2195 | out ("\t\t-p PASSWORD password to derive the encryption key from") |
| 2196 | out ("\t\t-k KEY encryption key as 16 bytes in hexadecimal notation") |
| 2197 | out ("\t\t-s enforce strict handling of initialization vectors") |
| 2198 | out ("\t\t-i SOURCE file name to read from") |
| 2199 | out ("\t\t-o DESTINATION file to write output to") |
| 2200 | out ("\t\t-n SALT provide salt for scrypt mode in hex encoding") |
| 2201 | out ("\t\t-v print extra info") |
| 2202 | out ("\t\t-S split into files at object boundaries; this") |
| 2203 | out ("\t\t requires DESTINATION to refer to directory") |
| 2204 | out ("\t\t-D PDT header and ciphertext passthrough") |
| 2205 | out ("\t\t-f format of SCRYPT hash output (“default” or “parameters”)") |
| 2206 | out ("") |
| 2207 | out ("\tinstead of filenames, “-” may used to specify stdin / stdout") |
| 2208 | out ("") |
| 2209 | sys.exit ((err is True) and 42 or 0) |
| 2210 | |
| 2211 | |
| 2212 | def bail (msg): |
| 2213 | noise (msg) |
| 2214 | noise ("") |
| 2215 | usage (err=True) |
| 2216 | raise Unreachable |
| 2217 | |
| 2218 | |
| 2219 | def parse_argv (argv): |
| 2220 | global PDTCRYPT_OVERWRITE |
| 2221 | global SELF |
| 2222 | mode = PDTCRYPT_DECRYPT |
| 2223 | secret = None |
| 2224 | insspec = None |
| 2225 | outsspec = None |
| 2226 | outs = None |
| 2227 | nacl = None |
| 2228 | scrypt_format = PDTCRYPT_SCRYPT_DEFAULT |
| 2229 | |
| 2230 | argvi = iter (argv) |
| 2231 | SELF = os.path.basename (next (argvi)) |
| 2232 | |
| 2233 | try: |
| 2234 | rawsubcmd = next (argvi) |
| 2235 | subcommand = PDTCRYPT_SUB [rawsubcmd] |
| 2236 | except StopIteration: |
| 2237 | bail ("ERROR: subcommand required") |
| 2238 | except KeyError: |
| 2239 | bail ("ERROR: invalid subcommand “%s” specified" % rawsubcmd) |
| 2240 | |
| 2241 | def checked_arg (): |
| 2242 | nonlocal argvi |
| 2243 | try: |
| 2244 | return next (argvi) |
| 2245 | except StopIteration: |
| 2246 | bail ("ERROR: argument list incomplete") |
| 2247 | |
| 2248 | def checked_secret (s): |
| 2249 | nonlocal secret |
| 2250 | if secret is None: |
| 2251 | secret = s |
| 2252 | else: |
| 2253 | bail ("ERROR: encountered “%s” but secret already given" % arg) |
| 2254 | |
| 2255 | for arg in argvi: |
| 2256 | if arg in [ "-h", "--help" ]: |
| 2257 | usage () |
| 2258 | raise Unreachable |
| 2259 | elif arg in [ "-v", "--verbose", "--wtf" ]: |
| 2260 | global PDTCRYPT_VERBOSE |
| 2261 | PDTCRYPT_VERBOSE = True |
| 2262 | elif arg in [ "-i", "--in", "--source" ]: |
| 2263 | insspec = checked_arg () |
| 2264 | if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt from %s" % insspec) |
| 2265 | elif arg in [ "-p", "--password" ]: |
| 2266 | arg = checked_arg () |
| 2267 | checked_secret (make_secret (password=arg)) |
| 2268 | if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypting with password") |
| 2269 | else: |
| 2270 | if subcommand == PDTCRYPT_SUB_PROCESS: |
| 2271 | if arg in [ "-s", "--strict-ivs" ]: |
| 2272 | global PDTCRYPT_STRICTIVS |
| 2273 | PDTCRYPT_STRICTIVS = True |
| 2274 | elif arg in [ "-o", "--out", "--dest", "--sink" ]: |
| 2275 | outsspec = checked_arg () |
| 2276 | if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt to %s" % outsspec) |
| 2277 | elif arg in [ "-f", "--force" ]: |
| 2278 | PDTCRYPT_OVERWRITE = True |
| 2279 | if PDTCRYPT_VERBOSE is True: noise ("PDT: overwrite existing files") |
| 2280 | elif arg in [ "-S", "--split" ]: |
| 2281 | mode |= PDTCRYPT_SPLIT |
| 2282 | if PDTCRYPT_VERBOSE is True: noise ("PDT: split files") |
| 2283 | elif arg in [ "-D", "--no-decrypt" ]: |
| 2284 | mode &= ~PDTCRYPT_DECRYPT |
| 2285 | if PDTCRYPT_VERBOSE is True: noise ("PDT: not decrypting") |
| 2286 | elif arg in [ "-k", "--key" ]: |
| 2287 | arg = checked_arg () |
| 2288 | checked_secret (make_secret (key=arg)) |
| 2289 | if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypting with key") |
| 2290 | else: |
| 2291 | bail ("ERROR: unexpected positional argument “%s”" % arg) |
| 2292 | elif subcommand == PDTCRYPT_SUB_SCRYPT: |
| 2293 | if arg in [ "-n", "--nacl", "--salt" ]: |
| 2294 | nacl = checked_arg () |
| 2295 | if PDTCRYPT_VERBOSE is True: noise ("PDT: salt key with %s" % nacl) |
| 2296 | elif arg in [ "-f", "--format" ]: |
| 2297 | arg = checked_arg () |
| 2298 | try: |
| 2299 | scrypt_format = PDTCRYPT_SCRYPT_FORMAT [arg] |
| 2300 | except KeyError: |
| 2301 | bail ("ERROR: invalid scrypt output format %s" % arg) |
| 2302 | if PDTCRYPT_VERBOSE is True: |
| 2303 | noise ("PDT: scrypt output format “%s”" % scrypt_format) |
| 2304 | else: |
| 2305 | bail ("ERROR: unexpected positional argument “%s”" % arg) |
| 2306 | elif subcommand == PDTCRYPT_SUB_SCAN: |
| 2307 | if arg in [ "-o", "--out", "--dest", "--sink" ]: |
| 2308 | outsspec = checked_arg () |
| 2309 | if PDTCRYPT_VERBOSE is True: noise ("PDT: decrypt to %s" % outsspec) |
| 2310 | elif arg in [ "-f", "--force" ]: |
| 2311 | PDTCRYPT_OVERWRITE = True |
| 2312 | if PDTCRYPT_VERBOSE is True: noise ("PDT: overwrite existing files") |
| 2313 | else: |
| 2314 | bail ("ERROR: unexpected positional argument “%s”" % arg) |
| 2315 | |
| 2316 | if secret is None: |
| 2317 | if PDTCRYPT_VERBOSE is True: |
| 2318 | noise ("ERROR: no password or key specified, trying $PDTCRYPT_PASSWORD") |
| 2319 | epw = os.getenv ("PDTCRYPT_PASSWORD") |
| 2320 | if epw is not None: |
| 2321 | checked_secret (make_secret (password=epw.strip ())) |
| 2322 | |
| 2323 | if secret is None: |
| 2324 | if PDTCRYPT_VERBOSE is True: |
| 2325 | noise ("ERROR: no password or key specified, trying $PDTCRYPT_KEY") |
| 2326 | ek = os.getenv ("PDTCRYPT_KEY") |
| 2327 | if ek is not None: |
| 2328 | checked_secret (make_secret (key=ek.strip ())) |
| 2329 | |
| 2330 | if secret is None: |
| 2331 | if subcommand == PDTCRYPT_SUB_IVCHECK: |
| 2332 | pass |
| 2333 | elif subcommand == PDTCRYPT_SUB_SCRYPT: |
| 2334 | bail ("ERROR: scrypt hash mode requested but no password given") |
| 2335 | elif mode & PDTCRYPT_DECRYPT: |
| 2336 | bail ("ERROR: decryption requested but no password given") |
| 2337 | |
| 2338 | if mode & PDTCRYPT_SPLIT and outsspec is None: |
| 2339 | bail ("ERROR: split mode is incompatible with stdout sink " |
| 2340 | "(the default)") |
| 2341 | |
| 2342 | if subcommand == PDTCRYPT_SUB_SCAN and outsspec is None: |
| 2343 | pass # no output by default in scan mode |
| 2344 | elif mode & PDTCRYPT_SPLIT or subcommand == PDTCRYPT_SUB_SCAN: |
| 2345 | # destination must be directory |
| 2346 | if outsspec == "-": |
| 2347 | bail ("ERROR: mode is incompatible with stdout sink") |
| 2348 | try: |
| 2349 | try: |
| 2350 | os.makedirs (outsspec, 0o700) |
| 2351 | except FileExistsError: |
| 2352 | # if it’s a directory with appropriate perms, everything is |
| 2353 | # good; otherwise, below invocation of open(2) will fail |
| 2354 | pass |
| 2355 | outs = os.open (outsspec, os.O_DIRECTORY, 0o600) |
| 2356 | except FileNotFoundError as exn: |
| 2357 | bail ("ERROR: cannot create target directory “%s”" % outsspec) |
| 2358 | except NotADirectoryError as exn: |
| 2359 | bail ("ERROR: target path “%s” is not a directory" % outsspec) |
| 2360 | else: |
| 2361 | outs = deptdcrypt_mk_stream (PDTCRYPT_SINK, outsspec or "-") |
| 2362 | |
| 2363 | if subcommand == PDTCRYPT_SUB_SCAN: |
| 2364 | if insspec is None: |
| 2365 | bail ("ERROR: please supply an input file for scanning") |
| 2366 | if insspec == '-': |
| 2367 | bail ("ERROR: input must be seekable; please specify a file") |
| 2368 | return True, partial (mode_scan, secret, insspec, outs, nacl=nacl) |
| 2369 | |
| 2370 | if subcommand == PDTCRYPT_SUB_IVCHECK: |
| 2371 | if insspec is None: |
| 2372 | bail ("ERROR: please supply an input file for checking ivs") |
| 2373 | |
| 2374 | if subcommand == PDTCRYPT_SUB_SCRYPT: |
| 2375 | if secret [0] == PDTCRYPT_SECRET_KEY: |
| 2376 | bail ("ERROR: scrypt mode requires a password") |
| 2377 | if insspec is not None and nacl is not None \ |
| 2378 | or insspec is None and nacl is None : |
| 2379 | bail ("ERROR: please supply either an input file or " |
| 2380 | "the salt") |
| 2381 | |
| 2382 | # default to stdout |
| 2383 | ins = None |
| 2384 | if insspec is not None or subcommand != PDTCRYPT_SUB_SCRYPT: |
| 2385 | ins = deptdcrypt_mk_stream (PDTCRYPT_SOURCE, insspec or "-") |
| 2386 | |
| 2387 | if subcommand == PDTCRYPT_SUB_IVCHECK: |
| 2388 | return True, partial (mode_ivcheck, ins) |
| 2389 | |
| 2390 | if subcommand == PDTCRYPT_SUB_SCRYPT: |
| 2391 | return True, partial (mode_scrypt, secret [1].encode (), ins, nacl, |
| 2392 | fmt=scrypt_format) |
| 2393 | |
| 2394 | return True, partial (mode_depdtcrypt, mode, secret, ins, outs) |
| 2395 | |
| 2396 | |
| 2397 | def main (argv): |
| 2398 | ok, runner = parse_argv (argv) |
| 2399 | |
| 2400 | if ok is True: return runner () |
| 2401 | |
| 2402 | return 1 |
| 2403 | |
| 2404 | |
| 2405 | if __name__ == "__main__": |
| 2406 | sys.exit (main (sys.argv)) |
| 2407 | |