enable strict IV checking by default during decryption
[python-delta-tar] / deltatar / deltatar.py
CommitLineData
6b2fa38f 1#!/usr/bin/env python3
0708a374 2
51797cd6 3# Copyright (C) 2013, 2014 Intra2net AG
0708a374
ERE
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; either version 3 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see
17# <http://www.gnu.org/licenses/lgpl-3.0.html>
18
938c2d54
PG
19DELTATAR_HEADER_VERSION = 1
20DELTATAR_PARAMETER_VERSION = 1
3fdea6d4 21
0708a374
ERE
22import logging
23import datetime
6c678f3a 24import binascii
938c2d54 25import io
0501fe0a 26import operator
0708a374 27import os
0501fe0a 28import copy
82de3376 29import shutil
8a8fadda 30import re
e82f14f5
ERE
31import stat
32import json
c9ee0159 33import typing
0708a374
ERE
34from functools import partial
35
36from . import tarfile
2ae46844 37from . import crypto
0708a374 38
0708a374
ERE
39class NullHandler(logging.Handler):
40 def emit(self, record):
41 pass
24ddf0a2
ERE
42
43
0708a374
ERE
44logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
45
974408b5
ERE
46
47# match mode
48NO_MATCH = False
49MATCH = True
50PARENT_MATCH = 2
51
133d30da
PG
52# encryption direction
53CRYPTO_MODE_ENCRYPT = 0
54CRYPTO_MODE_DECRYPT = 1
55
13cc7dfc
PG
56# The canonical extension for encrypted backup files regardless of the actual
57# encryption parameters is “.pdtcrypt”. This is analogous to the encryption
58# header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
59# Since the introduction of the versioned header there no longer any need
60# for encoding encryption parameters in the file extensions (“.aes128” and
61# suchlike).
62PDTCRYPT_EXTENSION = "pdtcrypt"
2cdd9faf
PG
63PDT_TYPE_ARCHIVE = 0
64PDT_TYPE_AUX = 1
13cc7dfc 65
9eccb1c2
PG
66AUXILIARY_FILE_INDEX = 0
67AUXILIARY_FILE_INFO = 1
68
0708a374
ERE
69class DeltaTar(object):
70 '''
71 Backup class used to create backups
72 '''
73
74 # list of files to exclude in the backup creation or restore operation. It
75 # can contain python regular expressions.
76 excluded_files = []
77
78 # list of files to include in the backup creation or restore operation. It
79 # can contain python regular expressions. If empty, all files in the source
80 # path will be backed up (when creating a backup) or all the files in the
a83fa4ed 81 # backup will be restored (when restoring a backup), but if included_files
0708a374
ERE
82 # is set then only the files include in the list will be processed.
83 included_files = []
84
85 # custom filter of files to be backed up (or restored). Unused and unset
86 # by default. The function receives a file path and must return a boolean.
87 filter_func = None
88
da26094a
ERE
89 # mode in which the delta will be created (when creating a backup) or
90 # opened (when restoring). Accepts modes analog to the tarfile library.
91 mode = ""
0708a374
ERE
92
93 # used together with aes modes to encrypt and decrypt backups.
94 password = None
1f3fd7b0
PG
95 crypto_key = None
96 nacl = None
0708a374 97
dbee011c
PG
98 # parameter version to use when encrypting; note that this has no effect
99 # on decryption since the required settings are determined from the headers
54f909ca 100 crypto_version = DELTATAR_HEADER_VERSION
dbee011c
PG
101 crypto_paramversion = None
102
133d30da 103 # when encrypting or decrypting, these hold crypto handlers; created before
2ae46844 104 # establishing the Tarfile stream iff a password is supplied.
133d30da
PG
105 encryptor = None
106 decryptor = None
2ae46844 107
0708a374
ERE
108 # python logger object.
109 logger = None
110
3a7e1a50
ERE
111 # specifies the index mode in the same format as @param mode, but without
112 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
2ae46844 113 # that the index is encrypted if no password is given in the constructor.
3a7e1a50 114 index_mode = None
0708a374
ERE
115
116 # current time for this backup. Used for file names and file creation checks
117 current_time = None
118
9eae9a1f
ERE
119 # extra data to included in the header of the index file when creating a
120 # backup
121 extra_data = dict()
122
0708a374
ERE
123 # valid tarfile modes and their corresponding default file extension
124 __file_extensions_dict = {
da26094a
ERE
125 '': '',
126 ':': '',
127 ':gz': '.gz',
128 ':bz2': '.bz2',
129 '|': '',
130 '|gz': '.gz',
131 '|bz2': '.bz2',
132 '#gz': '.gz',
6e99d23a
PG
133 '#gz.pdtcrypt': '.gz',
134 '#pdtcrypt': '',
d1c38f40 135 '#': '',
0708a374
ERE
136 }
137
3a7e1a50
ERE
138 # valid index modes and their corresponding default file extension
139 __index_extensions_dict = {
140 '': '',
141 'gz': '.gz',
142 'bz2': '.bz2',
6e99d23a
PG
143 'gz.pdtcrypt': '.gz',
144 'pdtcrypt': '',
3a7e1a50
ERE
145 }
146
8adbe50d
ERE
147 # valid path prefixes
148 __path_prefix_list = [
149 u'snapshot://',
150 u'list://',
151 u'delete://'
152 ]
153
0708a374 154 def __init__(self, excluded_files=[], included_files=[],
da26094a 155 filter_func=None, mode="", password=None,
1f3fd7b0 156 crypto_key=None, nacl=None,
54f909ca 157 crypto_version=DELTATAR_HEADER_VERSION,
dbee011c 158 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
3a7e1a50 159 logger=None, index_mode=None, index_name_func=None,
0708a374
ERE
160 volume_name_func=None):
161 '''
162 Constructor. Configures the diff engine.
163
164 Parameters:
165 - excluded_files: list of files to exclude in the backup creation or
166 restore operation. It can contain python regular expressions.
167
168 - included_files: list of files to include in the backup creation or
169 restore operation. It can contain python regular expressions. If
170 empty, all files in the source path will be backed up (when creating a
171 backup) or all the files in the backup will be restored (when
a83fa4ed 172 restoring a backup), but if included_files is set then only the files
0708a374
ERE
173 include in the list will be processed.
174
175 - filter_func: custom filter of files to be backed up (or restored).
176 Unused and unset by default. The function receives a file path and
177 must return a boolean.
178
179 - mode: mode in which the delta will be created (when creating a backup)
180 or opened (when restoring). Accepts the same modes as the tarfile
181 library. Valid modes are:
182
da26094a
ERE
183 '' open uncompressed
184 ':' open uncompressed
185 ':gz' open with gzip compression
186 ':bz2' open with bzip2 compression
187 '|' open an uncompressed stream of tar blocks
188 '|gz' open a gzip compressed stream of tar blocks
189 '|bz2' open a bzip2 compressed stream of tar blocks
190 '#gz' open a stream of gzip compressed tar blocks
0708a374 191
1f3fd7b0
PG
192 - crypto_key: used to encrypt and decrypt backups. Encryption will
193 be enabled automatically if a key is supplied. Requires a salt to be
194 passed as well.
195
196 - nacl: salt that was used to derive the encryption key for embedding
197 in the PDTCRYPT header. Not needed when decrypting and when
198 encrypting with password.
199
6e99d23a
PG
200 - password: used to encrypt and decrypt backups. Encryption will be
201 enabled automatically if a password is supplied.
0708a374 202
54f909ca
PG
203 - crypto_version: version of the format, determining the kind of PDT
204 object header.
205
dbee011c
PG
206 - crypto_paramversion: optionally request encryption conforming to
207 a specific parameter version. Defaults to the standard PDT value
208 which as of 2017 is the only one available.
209
0708a374
ERE
210 - logger: python logger object. Optional.
211
3a7e1a50 212 - index_mode: specifies the index mode in the same format as @param
6e99d23a
PG
213 mode, but without the ':', '|' or '#' at the begining. If encryption
214 is requested it will extend to the auxiliary (index, info) files as
215 well. This is an optional parameter that will automatically mimic
216 @param mode by default if not provided. Valid modes are:
3a7e1a50
ERE
217
218 '' open uncompressed
219 'gz' open with gzip compression
220 'bz2' open with bzip2 compression
0708a374
ERE
221
222 - index_name_func: function that sets a custom name for the index file.
2cc6e32b
PG
223 This function receives a flag to indicate whether the name will be
224 used for a full or diff backup. The backup path will be prepended to
225 its return value.
0708a374
ERE
226
227 - volume_name_func: function that defines the name of tar volumes. It
228 receives the backup_path, if it's a full backup and the volume number,
229 and must return the name for the corresponding volume name. Optional,
230 DeltaTar has default names for tar volumes.
231 '''
232
da26094a 233 if mode not in self.__file_extensions_dict:
8a54d5dd
PG
234 raise Exception('Unrecognized extension mode=[%s] requested for files'
235 % str(mode))
0708a374
ERE
236
237 self.excluded_files = excluded_files
238 self.included_files = included_files
239 self.filter_func = filter_func
240 self.logger = logging.getLogger('deltatar.DeltaTar')
241 if logger:
242 self.logger.addHandler(logger)
243 self.mode = mode
2ae46844 244
1f3fd7b0
PG
245 if crypto_key is not None:
246 self.crypto_key = crypto_key
247 self.nacl = nacl # encryption only
248
2ae46844
PG
249 if password is not None:
250 self.password = password
3a7e1a50 251
54f909ca
PG
252 if crypto_version is not None:
253 self.crypto_version = crypto_version
254
dbee011c
PG
255 if crypto_paramversion is not None:
256 self.crypto_paramversion = crypto_paramversion
257
3a7e1a50
ERE
258 # generate index_mode
259 if index_mode is None:
260 index_mode = ''
6e99d23a 261 if 'gz' in mode:
3a7e1a50
ERE
262 index_mode = "gz"
263 elif 'bz2' in mode:
264 index_mode = "bz2"
265 elif mode not in self.__index_extensions_dict:
8a54d5dd
PG
266 raise Exception('Unrecognized extension mode=[%s] requested for index'
267 % str(mode))
3a7e1a50
ERE
268
269 self.index_mode = index_mode
0708a374
ERE
270 self.current_time = datetime.datetime.now()
271
272 if index_name_func is not None:
273 self.index_name_func = index_name_func
274
275 if volume_name_func is not None:
276 self.volume_name_func = volume_name_func
277
e54cfec5 278 def pick_extension(self, kind, mode=None):
2cdd9faf
PG
279 """
280 Choose the extension depending on a) the kind of file given, b) the
281 processing mode, and c) the current encryption settings.
282 """
283 ret = ""
284 if kind == PDT_TYPE_ARCHIVE:
285 ret += ".tar"
e54cfec5
PG
286 if mode is None:
287 mode = self.__index_extensions_dict [self.index_mode]
2cdd9faf 288 ret += mode
a83fa4ed 289 if self.crypto_key is not None or self.password is not None:
2cdd9faf
PG
290 ret += "." + PDTCRYPT_EXTENSION
291 return ret
292
f0287fb7 293 def index_name_func(self, is_full): # pylint: disable=method-hidden
0708a374 294 '''
2cc6e32b
PG
295 Callback for setting a custom name for the index file. Depending on
296 whether *is_full* is set, it will create a suitable name for a full
297 or a diff backup.
0708a374
ERE
298 '''
299 prefix = "bfull" if is_full else "bdiff"
f7940c31 300 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf
PG
301 extension = self.pick_extension \
302 (PDT_TYPE_AUX,
303 self.__index_extensions_dict [self.index_mode])
0708a374 304
da26094a 305 return "%s-%s.index%s" % (prefix, date_str, extension)
0708a374 306
f0287fb7
CH
307 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
308 is_full, volume_number,
309 guess_name=False):
0708a374
ERE
310 '''
311 function that defines the name of tar volumes. It receives the
312 backup_path, if it's a full backup and the volume number, and must return
313 the name for the corresponding volume name. Optional, DeltaTar has default
314 names for tar volumes.
df86af81
ERE
315
316 If guess_name is activated, the file is intended not to be created but
317 to be found, and thus the date will be guessed.
0708a374
ERE
318 '''
319 prefix = "bfull" if is_full else "bdiff"
2cdd9faf
PG
320 extension = self.pick_extension \
321 (PDT_TYPE_ARCHIVE,
322 self.__file_extensions_dict [self.mode])
0708a374 323
df86af81 324 if not guess_name:
f7940c31 325 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf 326 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
df86af81
ERE
327 else:
328 prefix = prefix + "-"
90b75470 329 postfix = "-%03d%s" % (volume_number + 1, extension)
86a6e741
ERE
330 for f in os.listdir(backup_path):
331 if f.startswith(prefix) and f.endswith(postfix):
332 return f
df86af81
ERE
333 raise Exception("volume not found")
334
0708a374 335
974408b5 336 def filter_path(self, path, source_path="", is_dir=None):
8a8fadda
ERE
337 '''
338 Filters a path, given the source_path, using the filtering properties
339 set in the constructor.
340 The filtering order is:
341 1. included_files (if any)
342 2. excluded_files
343 3. filter_func (which must return whether the file is accepted or not)
344 '''
75059f3c 345
c1af2184 346 if len(source_path) > 0:
75059f3c
CH
347 # ensure that exactly one '/' at end of dir is also removed
348 source_path = source_path.rstrip(os.sep) + os.sep
8a8fadda
ERE
349 path = path[len(source_path):]
350
351 # 1. filter included_files
974408b5 352 match = MATCH
8a8fadda 353 if len(self.included_files) > 0:
974408b5 354 match = NO_MATCH
8a8fadda
ERE
355 for i in self.included_files:
356 # it can be either a regexp or a string
be60ffd0 357 if isinstance(i, str):
8a8fadda
ERE
358 # if the string matches, then continue
359 if i == path:
974408b5 360 match = MATCH
c1af2184 361 break
8a8fadda
ERE
362
363 # if the string ends with / it's a directory, and if the
7b07645e 364 # path is contained in it, it is included
c1af2184 365 if i.endswith('/') and path.startswith(i):
974408b5 366 match = MATCH
c1af2184 367 break
8a8fadda
ERE
368
369 # if the string doesn't end with /, add it and do the same
370 # check
c1af2184 371 elif path.startswith(i + '/'):
974408b5 372 match = MATCH
c1af2184 373 break
8a8fadda 374
974408b5
ERE
375 # check for PARENT_MATCH
376 if is_dir:
377 dir_path = path
378 if not dir_path.endswith('/'):
379 dir_path += '/'
380
381 if i.startswith(dir_path):
382 match = PARENT_MATCH
383
8a8fadda 384 # if it's a reg exp, then we just check if it matches
c9ee0159 385 elif isinstance(i, typing.Pattern):
c1af2184 386 if i.match(path):
974408b5 387 match = MATCH
c1af2184 388 break
8a8fadda 389 else:
4bda6f45 390 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
8a8fadda 391
974408b5
ERE
392 if match == NO_MATCH:
393 return NO_MATCH
c1af2184 394
974408b5
ERE
395 # when a directory is in PARENT_MATCH, it doesn't matter if it's
396 # excluded. It's subfiles will be excluded, but the directory itself
397 # won't
398 if match != PARENT_MATCH:
8a8fadda
ERE
399 for e in self.excluded_files:
400 # it can be either a regexp or a string
be60ffd0 401 if isinstance(e, str):
8a8fadda 402 # if the string matches, then exclude
c1af2184 403 if e == path:
974408b5 404 return NO_MATCH
8a8fadda
ERE
405
406 # if the string ends with / it's a directory, and if the
407 # path starts with the directory, then exclude
c1af2184 408 if e.endswith('/') and path.startswith(e):
974408b5 409 return NO_MATCH
8a8fadda
ERE
410
411 # if the string doesn't end with /, do the same check with
412 # the slash added
c1af2184 413 elif path.startswith(e + '/'):
974408b5 414 return NO_MATCH
8a8fadda
ERE
415
416 # if it's a reg exp, then we just check if it matches
c9ee0159 417 elif isinstance(e, typing.Pattern):
c1af2184 418 if e.match(path):
974408b5 419 return NO_MATCH
8a8fadda 420 else:
4bda6f45 421 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
8a8fadda
ERE
422
423 if self.filter_func:
424 return self.filter_func(path)
425
974408b5 426 return match
8a8fadda 427
283fbd5e 428 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
0708a374
ERE
429 '''
430 Walk a directory recursively, yielding each file/directory
c059a221
PG
431
432 Returns the path of an entity. If ``keep_base_dir`` is set,
433 the path returned contains the prefix ``source_path``; otherwise it is
434 relative to the prefix.
0708a374
ERE
435 '''
436
283fbd5e 437 source_path = source_path.rstrip(os.sep)
0708a374 438
283fbd5e 439 if keep_base_dir:
adf7dac4 440 beginning_size = 0
283fbd5e
CH
441 else:
442 beginning_size = len(source_path) + 1 # +1 for os.sep
443
444 queue = [source_path]
445
d07c8065 446 while queue:
df86af81 447 cur_path = queue.pop(0)
0708a374 448
e76ca7e0
PG
449 try:
450 dfd = os.open (cur_path, os.O_DIRECTORY)
451 except FileNotFoundError as exn:
452 self.logger.warning ("failed to open entity [%s] as directory; "
453 "file system (error: %s); skipping"
454 % (cur_path, str (exn)))
d86735e4
ERE
455 continue
456
c059a221
PG
457 try:
458 for filename in sorted(os.listdir(dfd)):
459 child = os.path.join(cur_path, filename)
460 is_dir = os.path.isdir(child)
461 status = self.filter_path(child, source_path, is_dir)
462 if status == NO_MATCH:
463 continue
464 if not os.access(child, os.R_OK):
465 self.logger.warning('Error accessing possibly locked file %s' % child)
466 continue
467
468 if status == MATCH:
469 yield child[beginning_size:]
470
471 if is_dir and (status == MATCH or status == PARENT_MATCH):
472 queue.append(child)
473 finally:
474 os.close (dfd)
0708a374 475
e82f14f5
ERE
476 def _stat_dict(self, path):
477 '''
478 Returns a dict with the stat data used to compare files
479 '''
480 stinfo = os.stat(path)
481 mode = stinfo.st_mode
482
483 ptype = None
484 if stat.S_ISDIR(mode):
d07c8065 485 ptype = u'directory'
e82f14f5 486 elif stat.S_ISREG(mode):
d07c8065 487 ptype = u'file'
e82f14f5 488 elif stat.S_ISLNK(mode):
d07c8065 489 ptype = u'link'
e82f14f5
ERE
490
491 return {
d07c8065 492 u'type': ptype,
be60ffd0 493 u'path': path,
d07c8065 494 u'mode': mode,
0501fe0a
ERE
495 u'mtime': int(stinfo.st_mtime),
496 u'ctime': int(stinfo.st_ctime),
d07c8065
ERE
497 u'uid': stinfo.st_uid,
498 u'gid': stinfo.st_gid,
499 u'inode': stinfo.st_ino,
500 u'size': stinfo.st_size
e82f14f5
ERE
501 }
502
df99a044 503 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
d07c8065
ERE
504 '''
505 Return if the dicts are equal in the stat keys
506 '''
fc8fdcbc 507 keys = [u'type', u'mode',u'size', u'mtime',
d041935c 508 # not restored: u'inode', u'ctime'
df99a044 509 ]
8adbe50d 510
fc8fdcbc 511 # only if user is root, then also check gid/uid. otherwise do not check it,
d041935c 512 # because tarfile can chown in case of being superuser only
50d70ca9
PG
513 #
514 # also, skip the check in rpmbuild since the sources end up with the
515 # uid:gid of the packager while the extracted files are 0:0.
516 if hasattr(os, "geteuid") and os.geteuid() == 0 \
517 and os.getenv ("RPMBUILD_OPTIONS") is None:
fc8fdcbc
ERE
518 keys.append('gid')
519 keys.append('uid')
520
ea6d3c3e 521 if (not d1 and d2 != None) or (d1 != None and not d2):
8adbe50d
ERE
522 return False
523
cbac9f0b
ERE
524 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
525 return False
8adbe50d 526
fc8fdcbc
ERE
527 type = d1.get('type', '')
528
d07c8065 529 for key in keys:
fc8fdcbc
ERE
530 # size doesn't matter for directories
531 if type == 'directory' and key == 'size':
532 continue
d07c8065
ERE
533 if d1.get(key, -1) != d2.get(key, -2):
534 return False
535 return True
536
df99a044 537 def prefixed(self, path, listsnapshot_equal=False):
8adbe50d
ERE
538 '''
539 if a path is not prefixed, return it prefixed
540 '''
541 for prefix in self.__path_prefix_list:
542 if path.startswith(prefix):
df99a044
ERE
543 if listsnapshot_equal and prefix == u'list://':
544 return u'snapshot://' + path[len(prefix):]
8adbe50d
ERE
545 return path
546 return u'snapshot://' + path
547
548 def unprefixed(self, path):
549 '''
550 remove a path prefix if any
551 '''
552 for prefix in self.__path_prefix_list:
553 if path.startswith(prefix):
554 return path[len(prefix):]
555 return path
556
133d30da 557
b750b280
PG
558 def initialize_encryption (self, mode, strict_validation=True):
559 """
560 :type strict_validation: bool
561 :param strict_validation: Enable strict IV checking in the crypto
562 layer. Should be disabled when dealing with
563 potentially corrupted data.
564 """
133d30da 565 password = self.password
1f3fd7b0
PG
566 key = self.crypto_key
567 nacl = self.nacl
133d30da 568
1f3fd7b0 569 if key is None and password is None:
133d30da
PG
570 return
571 if mode == CRYPTO_MODE_ENCRYPT:
1f3fd7b0
PG
572 return crypto.Encrypt (password=password,
573 key=key,
574 nacl=nacl,
54f909ca 575 version=self.crypto_version,
774ca538 576 paramversion=self.crypto_paramversion)
133d30da 577 if mode == CRYPTO_MODE_DECRYPT:
b750b280
PG
578 return crypto.Decrypt (password=password, key=key,
579 strict_ivs=strict_validation)
133d30da
PG
580
581 raise Exception ("invalid encryption mode [%r]" % mode)
582
583
b750b280
PG
584 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX,
585 strict_validation=True):
3a7e1a50 586 '''
9eccb1c2
PG
587 Given the specified configuration, opens a file for reading or writing,
588 inheriting the encryption and compression settings from the backup.
589 Returns a file object ready to use.
3fdea6d4 590
c8c72fe1
PG
591 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
592 respectively).
593 :type mode: str
774ca538
PG
594 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
595 Both the info and the auxiliary file have a globally
596 unique, constant counter value.
3fdea6d4 597 :type kind: str
3a7e1a50 598 '''
3a7e1a50
ERE
599 if self.index_mode.startswith('gz'):
600 comptype = 'gz'
601 elif self.index_mode.startswith('bz2'):
602 comptype = 'bz2'
603 else:
604 comptype = 'tar'
605
133d30da 606 crypto_ctx = None
6de9444a 607 enccounter = None
133d30da 608 if mode == "w":
774ca538 609 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 610 elif mode == "r":
b750b280
PG
611 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT,
612 strict_validation=strict_validation)
133d30da 613
3031b7ae
PG
614 if crypto_ctx is not None:
615 if kind == AUXILIARY_FILE_INFO:
616 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
617 elif kind == AUXILIARY_FILE_INDEX:
618 enccounter = crypto.AES_GCM_IV_CNT_INDEX
619 else:
620 raise Exception ("invalid kind of aux file %r" % kind)
621
c8c72fe1 622 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
3fdea6d4 623 bufsize=tarfile.RECORDSIZE, fileobj=None,
6de9444a 624 encryption=crypto_ctx, enccounter=enccounter)
c8c72fe1
PG
625
626 return sink
627
3a7e1a50 628
0708a374 629 def create_full_backup(self, source_path, backup_path,
d4a05db6 630 max_volume_size=None, extra_data=dict()):
0708a374
ERE
631 '''
632 Creates a full backup.
633
634 Parameters:
635 - source_path: source path to the directory to back up.
636 - backup_path: path where the back up will be stored. Backup path will
637 be created if not existent.
d5361dac
ERE
638 - max_volume_size: maximum volume size in megabytes. Used to split the
639 backup in volumes. Optional (won't split in volumes by default).
9eae9a1f
ERE
640 - extra_data: a json-serializable dictionary with information that you
641 want to be included in the header of the index file
0708a374
ERE
642 '''
643 # check input
be60ffd0 644 if not isinstance(source_path, str):
0708a374
ERE
645 raise Exception('Source path must be a string')
646
be60ffd0 647 if not isinstance(backup_path, str):
0708a374
ERE
648 raise Exception('Backup path must be a string')
649
650 if not os.path.exists(source_path) or not os.path.isdir(source_path):
651 raise Exception('Source path "%s" does not exist or is not a '\
652 'directory' % source_path)
653
d07c8065
ERE
654 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
655 max_volume_size < 1):
656 raise Exception('max_volume_size must be a positive integer')
d5361dac
ERE
657 if max_volume_size != None:
658 max_volume_size = max_volume_size*1024*1024
659
9eae9a1f
ERE
660 if not isinstance(extra_data, dict):
661 raise Exception('extra_data must be a dictionary')
662
663 try:
664 extra_data_str = json.dumps(extra_data)
665 except:
666 raise Exception('extra_data is not json-serializable')
667
0708a374
ERE
668 if not os.access(source_path, os.R_OK):
669 raise Exception('Source path "%s" is not readable' % source_path)
670
671 # try to create backup path if needed
37ab0f57 672 os.makedirs(backup_path, exist_ok=True)
0708a374
ERE
673
674 if not os.access(backup_path, os.W_OK):
675 raise Exception('Backup path "%s" is not writeable' % backup_path)
676
677 if source_path.endswith('/'):
678 source_path = source_path[:-1]
679
680 if backup_path.endswith('/'):
681 backup_path = backup_path[:-1]
682
683 # update current time
684 self.current_time = datetime.datetime.now()
685
686 if self.mode not in self.__file_extensions_dict:
687 raise Exception('Unrecognized extension')
688
2ae46844 689 # setup for encrypting payload
774ca538
PG
690 if self.encryptor is None:
691 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
2ae46844 692
0708a374 693 # some initialization
11684b1d 694 self.vol_no = 0
0708a374
ERE
695
696 # generate the first volume name
697 vol_name = self.volume_name_func(backup_path, True, 0)
698 tarfile_path = os.path.join(backup_path, vol_name)
699
774ca538
PG
700 # init index
701 index_name = self.index_name_func(True)
702 index_path = os.path.join(backup_path, index_name)
703 index_sink = self.open_auxiliary_file(index_path, 'w')
e82f14f5 704
d5361dac
ERE
705 cwd = os.getcwd()
706
b7c47f38 707 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
0708a374
ERE
708 '''
709 Handles the new volumes
710 '''
d5361dac
ERE
711 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
712 volume_path = os.path.join(backup_path, volume_name)
11684b1d 713 deltarobj.vol_no = volume_number
d5361dac
ERE
714
715 # we convert relative paths into absolute because CWD is changed
716 if not os.path.isabs(volume_path):
717 volume_path = os.path.join(cwd, volume_path)
11684b1d 718
8e019196
ERE
719 if tarobj.fileobj is not None:
720 tarobj.fileobj.close()
721
b008f989
ERE
722 deltarobj.logger.debug("opening volume %s" % volume_path)
723
b7c47f38 724 tarobj.open_volume(volume_path, encryption=encryption)
d5361dac
ERE
725
726 # wraps some args from context into the handler
133d30da 727 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
0708a374 728
774ca538 729 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
6c678f3a 730
be60ffd0 731 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
6c678f3a 732 # calculate checksum and write into the stream
c2ffe2ec 733 crc = binascii.crc32(s) & 0xFFFFffff
774ca538 734 index_sink.write(s)
e82f14f5 735
0708a374
ERE
736 # start creating the tarfile
737 tarobj = tarfile.TarFile.open(tarfile_path,
da26094a 738 mode='w' + self.mode,
0708a374 739 format=tarfile.GNU_FORMAT,
d1c38f40 740 concat='#' in self.mode,
133d30da 741 encryption=self.encryptor,
0708a374 742 max_volume_size=max_volume_size,
ea625b04 743 new_volume_handler=new_volume_handler,
e2b59b34
ERE
744 save_to_members=False,
745 dereference=True)
e5c6ca04 746 os.chdir(source_path)
55b8686d
ERE
747
748 # for each file to be in the backup, do:
e82f14f5 749 for path in self._recursive_walk_dir('.'):
3e9b81bb
PG
750
751 try: # backup file
fd743c26
PG
752 # calculate stat dict for current file
753 statd = self._stat_dict(path)
754 statd['path'] = u'snapshot://' + statd['path']
755 statd['volume'] = self.vol_no
756
757 # backup file
3e9b81bb
PG
758 tarobj.add(path, arcname = statd['path'], recursive=False)
759 except FileNotFoundError as exn:
760 # file vanished since the call to access(3) above
761 self.logger.warning ("object [%s] no longer available in "
762 "file system (error: %s); skipping"
763 % (path, str (exn)))
764 continue # prevent indexing
11684b1d 765
55b8686d 766 # retrieve file offset
253d4cdd 767 statd['offset'] = tarobj.get_last_member_offset()
b008f989 768 self.logger.debug("backup %s" % statd['path'])
6c678f3a 769
d041935c 770 # store the stat dict in the index
be60ffd0 771 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
6c678f3a 772 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 773 index_sink.write(s)
e82f14f5 774
be60ffd0 775 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
6c678f3a 776 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 777 index_sink.write(s)
be60ffd0 778 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
774ca538
PG
779 index_sink.write(s)
780
e5c6ca04 781 os.chdir(cwd)
0708a374 782 tarobj.close()
c8c72fe1 783 index_sink.close (close_fileobj=True)
938c2d54 784
0708a374 785 def create_diff_backup(self, source_path, backup_path, previous_index_path,
d4a05db6 786 max_volume_size=None, extra_data=dict()):
0708a374
ERE
787 '''
788 Creates a backup.
789
790 Parameters:
791 - source_path: source path to the directory to back up.
792 - backup_path: path where the back up will be stored. Backup path will
793 be created if not existent.
794 - previous_index_path: index of the previous backup, needed to know
795 which files changed since then.
796 - max_volume_size: maximum volume size in megabytes (MB). Used to split
797 the backup in volumes. Optional (won't split in volumes by default).
3a7e1a50
ERE
798
799 NOTE: previous index is assumed to follow exactly the same format as
800 the index_mode setup in the constructor.
0708a374 801 '''
d07c8065 802 # check/sanitize input
be60ffd0 803 if not isinstance(source_path, str):
d07c8065
ERE
804 raise Exception('Source path must be a string')
805
be60ffd0 806 if not isinstance(backup_path, str):
d07c8065
ERE
807 raise Exception('Backup path must be a string')
808
809 if not os.path.exists(source_path) or not os.path.isdir(source_path):
810 raise Exception('Source path "%s" does not exist or is not a '\
811 'directory' % source_path)
812
9eae9a1f
ERE
813 if not isinstance(extra_data, dict):
814 raise Exception('extra_data must be a dictionary')
815
816 try:
817 extra_data_str = json.dumps(extra_data)
818 except:
819 raise Exception('extra_data is not json-serializable')
820
d07c8065
ERE
821 if not os.access(source_path, os.R_OK):
822 raise Exception('Source path "%s" is not readable' % source_path)
823
824 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
825 max_volume_size < 1):
826 raise Exception('max_volume_size must be a positive integer')
827 if max_volume_size != None:
828 max_volume_size = max_volume_size*1024*1024
829
be60ffd0 830 if not isinstance(previous_index_path, str):
d07c8065
ERE
831 raise Exception('previous_index_path must be A string')
832
833 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
834 raise Exception('Index path "%s" does not exist or is not a '\
835 'file' % previous_index_path)
836
837 if not os.access(previous_index_path, os.R_OK):
838 raise Exception('Index path "%s" is not readable' % previous_index_path)
839
840 # try to create backup path if needed
37ab0f57 841 os.makedirs(backup_path, exist_ok=True)
d07c8065
ERE
842
843 if not os.access(backup_path, os.W_OK):
844 raise Exception('Backup path "%s" is not writeable' % backup_path)
845
846 if source_path.endswith('/'):
847 source_path = source_path[:-1]
848
849 if backup_path.endswith('/'):
850 backup_path = backup_path[:-1]
851
852 # update current time
853 self.current_time = datetime.datetime.now()
854
855 if self.mode not in self.__file_extensions_dict:
856 raise Exception('Unrecognized extension')
857
2ae46844 858 # setup for encrypting payload
774ca538
PG
859 if self.encryptor is None:
860 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 861
d07c8065
ERE
862 # some initialization
863 self.vol_no = 0
864
865 # generate the first volume name
df86af81
ERE
866 vol_name = self.volume_name_func(backup_path, is_full=False,
867 volume_number=0)
d07c8065
ERE
868 tarfile_path = os.path.join(backup_path, vol_name)
869
938c2d54 870 # init index
d07c8065
ERE
871 cwd = os.getcwd()
872
3031b7ae
PG
873 index_name = self.index_name_func(is_full=False)
874 index_path = os.path.join(backup_path, index_name)
875 index_sink = self.open_auxiliary_file(index_path, 'w')
876
d07c8065
ERE
877 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
878 '''
879 Handles the new volumes
880 '''
df86af81
ERE
881 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
882 volume_number=volume_number)
d07c8065
ERE
883 volume_path = os.path.join(backup_path, volume_name)
884 deltarobj.vol_no = volume_number
885
886 # we convert relative paths into absolute because CWD is changed
887 if not os.path.isabs(volume_path):
888 volume_path = os.path.join(cwd, volume_path)
889
f624ff3d 890 deltarobj.logger.debug("opening volume %s" % volume_path)
d07c8065
ERE
891 tarobj.open_volume(volume_path)
892
893 # wraps some args from context into the handler
894 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
895
3031b7ae 896 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
d07c8065 897
be60ffd0 898 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
d07c8065 899 # calculate checksum and write into the stream
c2ffe2ec 900 crc = binascii.crc32(s) & 0xFFFFffff
3031b7ae 901 index_sink.write(s)
d07c8065
ERE
902
903 # start creating the tarfile
904 tarobj = tarfile.TarFile.open(tarfile_path,
905 mode='w' + self.mode,
906 format=tarfile.GNU_FORMAT,
d1c38f40 907 concat='#' in self.mode,
133d30da 908 encryption=self.encryptor,
d07c8065 909 max_volume_size=max_volume_size,
ea625b04 910 new_volume_handler=new_volume_handler,
e2b59b34
ERE
911 save_to_members=False,
912 dereference=True)
d07c8065 913
aae127d0
ERE
914
915 # create the iterators, first the previous index iterator, then the
916 # source path directory iterator and collate and iterate them
917 if not os.path.isabs(previous_index_path):
918 previous_index_path = os.path.join(cwd, previous_index_path)
919 index_it = self.iterate_index_path(previous_index_path)
920
d07c8065 921 os.chdir(source_path)
aae127d0
ERE
922 dir_it = self._recursive_walk_dir('.')
923 dir_path_it = self.jsonize_path_iterator(dir_it)
d07c8065 924
df86af81
ERE
925 def pr(path):
926 if not path:
927 return "None"
928 else:
929 return path["path"]
8edb2e3c 930
d07c8065 931 # for each file to be in the backup, do:
df86af81 932 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
aae127d0
ERE
933 action = None
934 # if file is not in the index, it means it's a new file, so we have
935 # to take a snapshot
df86af81 936
aae127d0
ERE
937 if not ipath:
938 action = 'snapshot'
939 # if the file is not in the directory iterator, it means that it has
d041935c 940 # been deleted, so we need to mark it as such
aae127d0
ERE
941 elif not dpath:
942 action = 'delete'
943 # if the file is in both iterators, it means it might have either
944 # not changed (in which case we will just list it in our index but
945 # it will not be included in the tar file), or it might have
e8d95fe5 946 # changed, in which case we will snapshot it.
aae127d0
ERE
947 elif ipath and dpath:
948 if self._equal_stat_dicts(ipath, dpath):
949 action = 'list'
950 else:
951 action = 'snapshot'
952 # TODO: when creating chained backups (i.e. diffing from another
953 # diff), we will need to detect the type of action in the previous
954 # index, because if it was delete and dpath is None, we should
955 # discard the file
956
957 if action == 'snapshot':
958 # calculate stat dict for current file
959 stat = dpath.copy()
be60ffd0 960 stat['path'] = "snapshot://" + dpath['path']
aae127d0
ERE
961 stat['volume'] = self.vol_no
962
50f43227
ERE
963 self.logger.debug("[STORE] %s" % dpath['path'])
964
3e9b81bb
PG
965 try: # backup file
966 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
967 # retrieve file offset
968 stat['offset'] = tarobj.get_last_member_offset()
969 except FileNotFoundError as exn:
970 # file vanished since the call to access(3) above
971 self.logger.warning ("object [%s] no longer available in "
972 "file system (error: %s); skipping"
973 % (dpath ["path"], str (exn)))
974 stat = None # prevent indexing
aae127d0 975
aae127d0 976 elif action == 'delete':
50f43227 977 path = self.unprefixed(ipath['path'])
aae127d0 978 stat = {
50f43227 979 u'path': u'delete://' + path,
aae127d0
ERE
980 u'type': ipath['type']
981 }
50f43227 982 self.logger.debug("[DELETE] %s" % path)
aae127d0
ERE
983
984 # mark it as deleted in the backup
42d39ca7 985 tarobj.add("/dev/null", arcname=stat['path'])
aae127d0
ERE
986 elif action == 'list':
987 stat = dpath.copy()
50f43227
ERE
988 path = self.unprefixed(ipath['path'])
989 stat['path'] = u'list://' + path
aae127d0 990 # unchanged files do not enter in the backup, only in the index
50f43227 991 self.logger.debug("[UNCHANGED] %s" % path)
80910564
TJ
992 else:
993 # should not happen
4bda6f45 994 self.logger.warning('unknown action in create_diff_backup: {0}'
80910564
TJ
995 ''.format(action))
996 stat = None
aae127d0 997
80910564
TJ
998 if stat:
999 # store the stat dict in the index
be60ffd0 1000 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
aae127d0 1001 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 1002 index_sink.write(s)
aae127d0 1003
be60ffd0 1004 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
aae127d0 1005 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 1006 index_sink.write(s)
be60ffd0 1007 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
3031b7ae 1008 index_sink.write(s)
938c2d54 1009
df86af81 1010 index_it.release()
aae127d0
ERE
1011 os.chdir(cwd)
1012 tarobj.close()
938c2d54
PG
1013 index_sink.close()
1014
1015
b750b280 1016 def iterate_index_path(self, index_path, strict_validation=True):
df86af81
ERE
1017 '''
1018 Returns an index iterator. Internally, it uses a classic iterator class.
1019 We do that instead of just yielding so that the iterator object can have
1020 an additional function to close the file descriptor that is opened in
1021 the constructor.
1022 '''
d07c8065 1023
df86af81
ERE
1024 class IndexPathIterator(object):
1025 def __init__(self, delta_tar, index_path):
1026 self.delta_tar = delta_tar
1027 self.index_path = index_path
1028 self.f = None
9eae9a1f 1029 self.extra_data = dict()
df86af81 1030 self.__enter__()
d07c8065 1031
df86af81
ERE
1032 def __iter__(self):
1033 return self
d07c8065 1034
df86af81
ERE
1035 def release(self):
1036 if self.f:
1037 self.f.close()
1038
1039 def __enter__(self):
1040 '''
1041 Allows this iterator to be used with the "with" statement
1042 '''
1043 if self.f is None:
b750b280
PG
1044 self.f = self.delta_tar.open_auxiliary_file \
1045 (self.index_path,
1046 'r',
1047 strict_validation=strict_validation)
df86af81
ERE
1048 # check index header
1049 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1050 if j.get("type", '') != 'python-delta-tar-index' or\
1051 j.get('version', -1) != 1:
1052 raise Exception("invalid index file format: %s" % json.dumps(j))
1053
9eae9a1f
ERE
1054 self.extra_data = j.get('extra_data', dict())
1055
df86af81
ERE
1056 # find BEGIN-FILE-LIST, ignore other headers
1057 while True:
1058 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1059 if j.get('type', '') == 'BEGIN-FILE-LIST':
1060 break
1061 return self
1062
1063 def __exit__(self, type, value, tb):
1064 '''
1065 Allows this iterator to be used with the "with" statement
1066 '''
ec57ce53
ERE
1067 if self.f:
1068 self.f.close()
df86af81 1069 self.f = None
d07c8065 1070
be60ffd0 1071 def __next__(self):
0349168a 1072 # read each file in the index and process it to do the restore
df86af81
ERE
1073 j = {}
1074 l_no = -1
1075 try:
1076 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
be60ffd0 1077 except Exception as e:
df86af81
ERE
1078 if self.f:
1079 self.f.close()
1080 raise e
d07c8065 1081
df86af81 1082 op_type = j.get('type', '')
d07c8065 1083
df86af81
ERE
1084 # when we detect the end of the list, break the loop
1085 if op_type == 'END-FILE-LIST':
1086 if self.f:
1087 self.f.close()
1088 raise StopIteration
1089
1090 # check input
1091 if op_type not in ['directory', 'file', 'link']:
4bda6f45 1092 self.delta_tar.logger.warning('unrecognized type to be '
df86af81
ERE
1093 'restored: %s, line %d' % (op_type, l_no))
1094 # iterate again
be60ffd0 1095 return self.__next__()
df86af81
ERE
1096
1097 return j, l_no
d07c8065 1098
df86af81 1099 return IndexPathIterator(self, index_path)
d07c8065 1100
26fdd428 1101 def iterate_tar_path(self, tar_path, new_volume_handler=None):
24ddf0a2
ERE
1102 '''
1103 Returns a tar iterator that iterates jsonized member items that contain
1104 an additional "member" field, used by RestoreHelper.
1105 '''
ec57ce53 1106 class TarPathIterator(object):
83a81852 1107 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
24ddf0a2 1108 self.delta_tar = delta_tar
ec57ce53 1109 self.tar_path = tar_path
24ddf0a2 1110 self.tar_obj = None
6bca471c 1111 self.last_member = None
26fdd428 1112 self.new_volume_handler = new_volume_handler
24ddf0a2
ERE
1113 self.__enter__()
1114
1115 def __iter__(self):
1116 return self
1117
1118 def release(self):
1119 if self.tar_obj:
1120 self.tar_obj.close()
1121
1122 def __enter__(self):
1123 '''
1124 Allows this iterator to be used with the "with" statement
1125 '''
1126 if self.tar_obj is None:
d5e1d60f
PG
1127 decryptor = None
1128 if self.delta_tar.password is not None:
1f3fd7b0
PG
1129 decryptor = crypto.Decrypt \
1130 (password=self.delta_tar.password,
b750b280
PG
1131 key=self.delta_tar.crypto_key,
1132 strict_ivs=False)
ec57ce53
ERE
1133 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1134 mode='r' + self.delta_tar.mode,
1135 format=tarfile.GNU_FORMAT,
d1c38f40 1136 concat='#' in self.delta_tar.mode,
d5e1d60f 1137 encryption=decryptor,
83a81852 1138 new_volume_handler=self.new_volume_handler,
e2b59b34
ERE
1139 save_to_members=False,
1140 dereference=True)
24ddf0a2
ERE
1141 return self
1142
1143 def __exit__(self, type, value, tb):
1144 '''
1145 Allows this iterator to be used with the "with" statement
1146 '''
ec57ce53
ERE
1147 if self.tar_obj:
1148 self.tar_obj.close()
24ddf0a2
ERE
1149 self.tar_obj = None
1150
be60ffd0 1151 def __next__(self):
24ddf0a2
ERE
1152 '''
1153 Read each member and return it as a stat dict
1154 '''
be60ffd0 1155 tarinfo = self.tar_obj.__iter__().__next__()
8e019196
ERE
1156 # NOTE: here we compare if tarinfo.path is the same as before
1157 # instead of comparing the tarinfo object itself because the
1158 # object itself might change for multivol tarinfos
1159 if tarinfo is None or (self.last_member is not None and\
1160 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
ec57ce53
ERE
1161 raise StopIteration
1162
6bca471c
ERE
1163 self.last_member = tarinfo
1164
24ddf0a2
ERE
1165 ptype = 'unknown'
1166 if tarinfo.isfile():
1167 ptype = 'file'
1168 elif tarinfo.isdir():
ab7e7465 1169 ptype = 'directory'
24ddf0a2
ERE
1170 elif tarinfo.islnk() or tarinfo.issym():
1171 ptype = 'link'
1172
1173 return {
1174 u'type': ptype,
1175 u'path': tarinfo.path,
1176 u'mode': tarinfo.mode,
1177 u'mtime': tarinfo.mtime,
1178 u'ctime': -1, # cannot restore
1179 u'uid': tarinfo.uid,
1180 u'gid': tarinfo.gid,
1181 u'inode': -1, # cannot restore
1182 u'size': tarinfo.size,
1183 u'member': tarinfo
ec57ce53
ERE
1184 }, 0
1185
26fdd428 1186 return TarPathIterator(self, tar_path, new_volume_handler)
24ddf0a2 1187
df99a044 1188 def jsonize_path_iterator(self, iter, strip=0):
d07c8065
ERE
1189 '''
1190 converts the yielded items of an iterator into json path lines.
df99a044
ERE
1191
1192 strip: Strip the smallest prefix containing num leading slashes from
1193 the file path.
d07c8065
ERE
1194 '''
1195 while True:
1196 try:
be60ffd0 1197 path = iter.__next__()
df99a044 1198 if strip == 0:
4ac6d333 1199 yield self._stat_dict(path), 0
df99a044
ERE
1200 else:
1201 st = self._stat_dict(path)
1202 st['path'] = "/".join(path.split("/")[strip:])
4ac6d333 1203 yield st, 0
d07c8065
ERE
1204 except StopIteration:
1205 break
1206
b84beea7
PG
1207 def iterate_disaster_index (self, index):
1208 """
1209 Mimick the behavior of the other object iterators, just with the inputs
1210 supplied directly as *index*.
1211 """
1212
1213 class RawIndexIterator(object):
65b35c42 1214 def __init__(self, delta_tar, index):
b84beea7
PG
1215 self.delta_tar = delta_tar
1216 self.index = index
1217 self.__enter__()
1218
1219 def __iter__(self):
1220 return self
1221
1222 def release(self):
65b35c42 1223 pass
b84beea7
PG
1224
1225 def __enter__(self):
1226 '''
1227 Allows this iterator to be used with the "with" statement
1228 '''
1229 self.iter = self.index.__iter__ ()
1230 return self
1231
1232 def __exit__(self, type, value, tb):
1233 '''
1234 Allows this iterator to be used with the "with" statement
1235 '''
1236
1237 def __next__(self):
1238 idxent = self.iter.__next__ ()
65b35c42 1239 return idxent, 0
b84beea7
PG
1240
1241 return RawIndexIterator(self, index)
1242
d07c8065
ERE
1243 def collate_iterators(self, it1, it2):
1244 '''
1245 Collate two iterators, so that it returns pairs of the items of each
1246 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1247 when there's no match for the items in the other iterator.
1248
1249 It assumes that the items in both lists are ordered in the same way.
1250 '''
ea6d3c3e 1251 l_no = 0
d07c8065
ERE
1252 elem1, elem2 = None, None
1253 while True:
1254 if not elem1:
1255 try:
be60ffd0 1256 elem1, l_no = it1.__next__()
d07c8065
ERE
1257 except StopIteration:
1258 if elem2:
ea6d3c3e 1259 yield (None, elem2, l_no)
d07c8065 1260 for elem2 in it2:
ea6d3c3e
ERE
1261 if isinstance(elem2, tuple):
1262 elem2 = elem2[0]
1263 yield (None, elem2, l_no)
d07c8065 1264 break
d07c8065
ERE
1265 if not elem2:
1266 try:
be60ffd0 1267 elem2 = it2.__next__()
d07c8065
ERE
1268 if isinstance(elem2, tuple):
1269 elem2 = elem2[0]
1270 except StopIteration:
1271 if elem1:
ea6d3c3e 1272 yield (elem1, None, l_no)
df99a044 1273 for elem1, l_no in it1:
ea6d3c3e 1274 yield (elem1, None, l_no)
d07c8065 1275 break
670f9934
ERE
1276
1277 index1 = self.unprefixed(elem1['path'])
1278 index2 = self.unprefixed(elem2['path'])
1279 i1, i2 = self.compare_indexes(index1, index2)
1280
1281 yield1 = yield2 = None
1282 if i1 is not None:
1283 yield1 = elem1
1284 elem1 = None
1285 if i2 is not None:
1286 yield2 = elem2
1287 elem2 = None
1288 yield (yield1, yield2, l_no)
1289
1290 def compare_indexes(self, index1, index2):
1291 '''
1292 Compare iterator indexes and return a tuple in the following form:
1293 if index1 < index2, returns (index1, None)
1294 if index1 == index2 returns (index1, index2)
1295 else: returns (None, index2)
1296 '''
1297 l1 = index1.split('/')
1298 l2 = index2.split('/')
1299 length = len(l2) - len(l1)
1300
1301 if length > 0:
1302 return (index1, None)
1303 elif length < 0:
1304 return (None, index2)
1305
1306 for i1, i2 in zip(l1, l2):
1307 if i1 < i2:
1308 return (index1, None)
1309 elif i1 > i2:
1310 return (None, index2)
1311
1312 return (index1, index2)
0708a374 1313
8c65a2b1 1314 def list_backup(self, backup_tar_path, list_func=None):
be60ffd0 1315 if not isinstance(backup_tar_path, str):
8c65a2b1
ERE
1316 raise Exception('Backup tar path must be a string')
1317
1318 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1319 raise Exception('Source path "%s" does not exist or is not a '\
1320 'file' % backup_tar_path)
1321
1322 if not os.access(backup_tar_path, os.R_OK):
1323 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1324
1325 cwd = os.getcwd()
1326
b7c47f38 1327 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
8c65a2b1
ERE
1328 '''
1329 Handles the new volumes
1330 '''
1331 volume_name = deltarobj.volume_name_func(backup_path, True,
1332 volume_number, guess_name=True)
1333 volume_path = os.path.join(backup_path, volume_name)
1334
1335 # we convert relative paths into absolute because CWD is changed
1336 if not os.path.isabs(volume_path):
1337 volume_path = os.path.join(cwd, volume_path)
b7c47f38
PG
1338 tarobj.open_volume(volume_path, encryption=encryption)
1339
774ca538 1340 if self.decryptor is None:
b750b280
PG
1341 self.decryptor = \
1342 self.initialize_encryption (CRYPTO_MODE_DECRYPT,
1343 strict_validation=False)
8c65a2b1
ERE
1344
1345 backup_path = os.path.dirname(backup_tar_path)
1346 if not os.path.isabs(backup_path):
1347 backup_path = os.path.join(cwd, backup_path)
133d30da 1348 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
b7a6566b 1349
8c65a2b1
ERE
1350 tarobj = tarfile.TarFile.open(backup_tar_path,
1351 mode='r' + self.mode,
1352 format=tarfile.GNU_FORMAT,
d1c38f40 1353 concat='#' in self.mode,
133d30da 1354 encryption=self.decryptor,
ea625b04 1355 new_volume_handler=new_volume_handler,
e2b59b34
ERE
1356 save_to_members=False,
1357 dereference=True)
8c65a2b1
ERE
1358
1359 def filter(cls, list_func, tarinfo):
1360 if list_func is None:
b008f989 1361 self.logger.info(tarinfo.path)
8c65a2b1
ERE
1362 else:
1363 list_func(tarinfo)
1364 return False
1365 filter = partial(filter, self, list_func)
1366
c650acfa 1367 tarobj.extractall(filter=filter, unlink=True)
8c65a2b1
ERE
1368 tarobj.close()
1369
0708a374 1370 def restore_backup(self, target_path, backup_indexes_paths=[],
e93f83f1 1371 backup_tar_path=None, restore_callback=None,
b750b280
PG
1372 disaster=tarfile.TOLERANCE_STRICT, backup_index=None,
1373 strict_validation=True):
0708a374
ERE
1374 '''
1375 Restores a backup.
1376
1377 Parameters:
0708a374
ERE
1378 - target_path: path to restore.
1379 - backup_indexes_paths: path to backup indexes, in descending date order.
1380 The indexes indicate the location of their respective backup volumes,
1381 and multiple indexes are needed to be able to restore diff backups.
1382 Note that this is an optional parameter: if not suplied, it will
1383 try to restore directly from backup_tar_path.
1384 - backup_tar_path: path to the backup tar file. Used as an alternative
1385 to backup_indexes_paths to restore directly from a tar file without
1386 using any file index. If it's a multivol tarfile, volume_name_func
1387 will be called.
4da27cfe 1388 - restore_callback: callback function to be called during restore.
b0aef801 1389 This is passed to the helper and gets called for every file.
11684b1d 1390
3a7e1a50 1391 NOTE: If you want to use an index to restore a backup, this function
11684b1d
ERE
1392 only supports to do so when the tarfile mode is either uncompressed or
1393 uses concat compress mode, because otherwise it would be very slow.
3a7e1a50
ERE
1394
1395 NOTE: Indices are assumed to follow the same format as the index_mode
1396 specified in the constructor.
e93f83f1
PG
1397
1398 Returns the list of files that could not be restored, if there were
1399 any.
0708a374 1400 '''
11684b1d 1401 # check/sanitize input
be60ffd0 1402 if not isinstance(target_path, str):
e5c6ca04
ERE
1403 raise Exception('Target path must be a string')
1404
11684b1d
ERE
1405 if backup_indexes_paths is None and backup_tar_path == []:
1406 raise Exception("You have to either provide index paths or a tar path")
e5c6ca04 1407
b84beea7
PG
1408 if isinstance (backup_index, list) is True:
1409 mode = "disaster"
1410 elif len(backup_indexes_paths) == 0:
ea6d3c3e
ERE
1411 mode = "tar"
1412 else:
1413 mode = "diff"
1414
1415 if mode == "tar":
be60ffd0 1416 if not isinstance(backup_tar_path, str):
11684b1d
ERE
1417 raise Exception('Backup tar path must be a string')
1418
1419 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1420 raise Exception('Source path "%s" does not exist or is not a '\
1421 'file' % backup_tar_path)
1422
1423 if not os.access(backup_tar_path, os.R_OK):
1424 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1425 else:
1426 if not isinstance(backup_indexes_paths, list):
1427 raise Exception('backup_indexes_paths must be a list')
1428
1429 if self.mode.startswith(':') or self.mode.startswith('|'):
1430 raise Exception('Restore only supports either uncompressed tars'
1431 ' or concat compression when restoring from an index, and '
1432 ' the open mode you provided is "%s"' % self.mode)
1433
1434 for index in backup_indexes_paths:
be60ffd0 1435 if not isinstance(index, str):
11684b1d 1436 raise Exception('indices must be strings')
e5c6ca04 1437
11684b1d
ERE
1438 if not os.path.exists(index) or not os.path.isfile(index):
1439 raise Exception('Index path "%s" does not exist or is not a '\
1440 'file' % index)
1441
1442 if not os.access(index, os.R_OK):
1443 raise Exception('Index path "%s" is not readable' % index)
e5c6ca04
ERE
1444
1445 # try to create backup path if needed
37ab0f57 1446 os.makedirs(target_path, exist_ok=True)
e5c6ca04 1447
ec57ce53
ERE
1448 # make backup_tar_path absolute so that iterate_tar_path works fine
1449 if backup_tar_path and not os.path.isabs(backup_tar_path):
1450 backup_tar_path = os.path.abspath(backup_tar_path)
1451
d5361dac 1452 cwd = os.getcwd()
ec57ce53 1453 os.chdir(target_path)
d5361dac 1454
2ae46844 1455 # setup for decrypting payload
774ca538 1456 if self.decryptor is None:
b750b280
PG
1457 self.decryptor = \
1458 self.initialize_encryption (CRYPTO_MODE_DECRYPT,
1459 strict_validation=strict_validation)
2ae46844 1460
ea6d3c3e 1461 if mode == 'tar':
24ddf0a2
ERE
1462 index_it = self.iterate_tar_path(backup_tar_path)
1463 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
ec57ce53 1464 tarobj=index_it.tar_obj)
ea6d3c3e 1465 elif mode == "diff":
04f4c7ab
PG
1466 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1467 disaster=disaster)
f3d10816
PG
1468 try:
1469 # get iterator from newest index at _data[0]
1470 index1 = helper._data[0]["path"]
b750b280
PG
1471 index_it = \
1472 self.iterate_index_path(index1,
1473 strict_validation=strict_validation)
f3d10816
PG
1474 except tarfile.DecryptionError as exn:
1475 self.logger.error("failed to decrypt file [%s]: %s; is this an "
afc87ebc
PG
1476 "actual encrypted index file?"
1477 % (index1, str (exn)))
1478 return [(index1, exn)]
1479 except Exception as exn:
1480 # compressed files
1481 self.logger.error("failed to read file [%s]: %s; is this an "
1482 "actual index file?" % (index1, str (exn)))
f3d10816 1483 return [(index1, exn)]
b84beea7
PG
1484 elif mode == "disaster":
1485 index_it = self.iterate_disaster_index (backup_index)
65b35c42
PG
1486 helper = RestoreHelper (self, cwd, backup_path=backup_tar_path,
1487 backup_index=backup_index,
1488 disaster=disaster)
b84beea7 1489
b750b280 1490 index_decryptor = helper._data[0]["decryptor"]
d07c8065 1491
24ddf0a2
ERE
1492 dir_it = self._recursive_walk_dir('.')
1493 dir_path_it = self.jsonize_path_iterator(dir_it)
11684b1d 1494
e93f83f1
PG
1495 failed = [] # irrecoverable files
1496
a395759e 1497 # for each file to be restored, do:
24ddf0a2
ERE
1498 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1499 if not ipath:
1500 upath = dpath['path']
1501 op_type = dpath['type']
1502 else:
1503 upath = self.unprefixed(ipath['path'])
1504 op_type = ipath['type']
42c04ead 1505
24ddf0a2 1506 # filter paths
75059f3c 1507 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
24ddf0a2 1508 continue
ea6d3c3e 1509
24ddf0a2
ERE
1510 # if types of the file mismatch, the file needs to be deleted
1511 # and re-restored
1512 if ipath is not None and dpath is not None and\
1513 dpath['type'] != ipath['type']:
1514 helper.delete(upath)
1515
1516 # if file not found in dpath, we can directly restore from index
1517 if not dpath:
1518 # if the file doesn't exist and it needs to be deleted, it
1519 # means that work is already done
1520 if ipath['path'].startswith('delete://'):
ea6d3c3e 1521 continue
24ddf0a2 1522 try:
b008f989 1523 self.logger.debug("restore %s" % ipath['path'])
4da27cfe 1524 helper.restore(ipath, l_no, restore_callback)
be60ffd0 1525 except Exception as e:
e93f83f1 1526 iipath = ipath.get ("path", "")
7b07645e 1527 self.logger.error("FAILED to restore: {} ({})"
e93f83f1 1528 .format(iipath, e))
04f4c7ab 1529 if disaster != tarfile.TOLERANCE_STRICT:
e93f83f1 1530 failed.append ((iipath, e))
24ddf0a2 1531 continue
11684b1d 1532
24ddf0a2
ERE
1533 # if both files are equal, we have nothing to restore
1534 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1535 continue
1536
1537 # we have to restore the file, but first we need to delete the
1538 # current existing file.
1539 # we don't delete the file if it's a directory, because it might
1540 # just have changed mtime, so it's quite inefficient to remove
1541 # it
1542 if ipath:
1543 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
42c04ead 1544 helper.delete(upath)
b008f989 1545 self.logger.debug("restore %s" % ipath['path'])
e93f83f1
PG
1546 try:
1547 helper.restore(ipath, l_no, restore_callback)
1548 except Exception as e:
04f4c7ab 1549 if disaster == tarfile.TOLERANCE_STRICT:
e93f83f1
PG
1550 raise
1551 failed.append ((ipath.get ("path", ""), e))
1552 continue
24ddf0a2
ERE
1553
1554 # if the file is not in the index (so it comes from the target
1555 # directory) then we have to delete it
1556 else:
c9d47a03 1557 self.logger.debug("delete %s" % upath)
24ddf0a2 1558 helper.delete(upath)
42c04ead 1559
ec57ce53
ERE
1560 helper.restore_directories_permissions()
1561 index_it.release()
1562 os.chdir(cwd)
1563 helper.cleanup()
ea6d3c3e 1564
e93f83f1
PG
1565 return failed
1566
1567
1568 def recover_backup(self, target_path, backup_indexes_paths=[],
1569 restore_callback=None):
1570 """
1571 Walk the index, extracting objects in disaster mode. Bad files are
1572 reported along with a reason.
1573 """
1574 return self.restore_backup(target_path,
1575 backup_indexes_paths=backup_indexes_paths,
b750b280
PG
1576 disaster=tarfile.TOLERANCE_RECOVER,
1577 strict_validation=False)
04f4c7ab
PG
1578
1579
6690f5e0 1580 def rescue_backup(self, target_path, backup_tar_path,
04f4c7ab
PG
1581 restore_callback=None):
1582 """
1583 More aggressive “unfsck” mode: do not rely on the index data as the
1584 files may be corrupt; skim files for header-like information and
1585 attempt to retrieve the data.
1586 """
27ee4dd4
PG
1587 def gen_volume_name (nvol):
1588 return os.path.join (os.path.dirname (backup_tar_path),
1589 self.volume_name_func (backup_tar_path,
1590 True,
1591 nvol))
1592
1593 backup_index = tarfile.gen_rescue_index (gen_volume_name,
1594 self.mode,
1595 password=self.password,
1596 key=self.crypto_key)
6690f5e0 1597
04f4c7ab 1598 return self.restore_backup(target_path,
b84beea7 1599 backup_index=backup_index,
65b35c42 1600 backup_tar_path=backup_tar_path,
b750b280
PG
1601 disaster=tarfile.TOLERANCE_RESCUE,
1602 strict_validation=False)
e93f83f1
PG
1603
1604
11684b1d
ERE
1605 def _parse_json_line(self, f, l_no):
1606 '''
ee0e095f 1607 Read line from file like object and process it as JSON.
11684b1d
ERE
1608 '''
1609 l = f.readline()
1610 l_no += 1
1611 try:
be60ffd0 1612 j = json.loads(l.decode('UTF-8'))
ee0e095f
PG
1613 except UnicodeDecodeError as e:
1614 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1615 raise Exception \
1616 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1617 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1618 from e
1619 raise Exception \
1620 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1621 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1622 from e
be60ffd0 1623 except ValueError as e:
11684b1d
ERE
1624 raise Exception("error parsing this json line "
1625 "(line number %d): %s" % (l_no, l))
1626 return j, l_no
ea6d3c3e 1627
24ddf0a2 1628
ea6d3c3e
ERE
1629class RestoreHelper(object):
1630 '''
1631 Class used to help to restore files from indices
1632 '''
1633
1634 # holds the dicts of data
1635 _data = []
1636
1637 _deltatar = None
1638
1639 _cwd = None
1640
0501fe0a
ERE
1641 # list of directories to be restored. This is done as a last step, see
1642 # tarfile.extractall for details.
1643 _directories = []
1644
04f4c7ab 1645 _disaster = tarfile.TOLERANCE_STRICT
e93f83f1 1646
037994ca 1647 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
65b35c42
PG
1648 backup_index=None, tarobj=None,
1649 disaster=tarfile.TOLERANCE_STRICT):
ea6d3c3e
ERE
1650 '''
1651 Constructor opens the tars and init the data structures.
1652
037994ca
PG
1653 Assumptions:
1654
1655 - Index list must be provided in reverse order (newer first).
1656 - “newer first” apparently means that if there are n backups
1657 provided, the last full backup is at index n-1 and the most recent
1658 diff backup is at index 0.
1659 - Only the first, the second, and the last elements of
1660 ``index_list`` are relevant, others will not be accessed.
1661 - If no ``index_list`` is provided, both ``tarobj`` and
1662 ``backup_path`` must be passed.
1663 - If ``index_list`` is provided, the values of ``tarobj`` and
1664 ``backup_path`` are ignored.
ea6d3c3e
ERE
1665 '''
1666 self._data = []
0501fe0a 1667 self._directories = []
ea6d3c3e
ERE
1668 self._deltatar = deltatar
1669 self._cwd = cwd
3031b7ae 1670 self._password = deltatar.password
1f3fd7b0 1671 self._crypto_key = deltatar.crypto_key
3031b7ae 1672 self._decryptors = []
e93f83f1 1673 self._disaster = disaster
ea6d3c3e 1674
b750b280
PG
1675 # Disable strict checking for linearly increasing IVs when running
1676 # in rescue or recover mode.
1677 strict_validation = disaster == tarfile.TOLERANCE_STRICT
1678
253d4cdd
ERE
1679 try:
1680 import grp, pwd
1681 except ImportError:
1682 grp = pwd = None
1683
1684 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1685 self.canchown = True
1686 else:
1687 self.canchown = False
1688
65b35c42 1689 if isinstance (backup_index, list) is True:
001bd488 1690 decryptor = self._deltatar.decryptor
65b35c42
PG
1691 self._data = \
1692 [{ "curr_vol_no" : None
1693 , "vol_fd" : None
1694 , "offset" : -1
1695 , "tarobj" : None
1696 , "path" : backup_path
1697 , "is_full" : True
1698 , "iterator" : None
1699 , "last_itelement" : None
1700 , "last_lno" : 0
001bd488
PG
1701 , "new_volume_handler" :
1702 partial(self.new_volume_handler,
1703 self._deltatar, self._cwd, True,
1704 os.path.dirname(backup_path), decryptor)
1705 , "decryptor" : decryptor
65b35c42
PG
1706 }]
1707 elif index_list is not None:
24ddf0a2 1708 for index in index_list:
037994ca 1709 is_full = index == index_list[-1]
24ddf0a2 1710
d5e1d60f 1711 decryptor = None
3031b7ae 1712 if self._password is not None:
1f3fd7b0 1713 decryptor = crypto.Decrypt (password=self._password,
b750b280
PG
1714 key=self._crypto_key,
1715 strict_ivs=strict_validation)
d5e1d60f 1716
24ddf0a2
ERE
1717 # make paths absolute to avoid cwd problems
1718 if not os.path.isabs(index):
1719 index = os.path.normpath(os.path.join(cwd, index))
1720
1721 s = dict(
1722 curr_vol_no = None,
1723 vol_fd = None,
1724 offset = -1,
1725 tarobj = None,
1726 path = index,
1727 is_full = is_full,
1728 iterator = None,
1729 last_itelement = None,
1730 last_lno = 0,
1731 new_volume_handler = partial(self.new_volume_handler,
1732 self._deltatar, self._cwd, is_full,
d5e1d60f
PG
1733 os.path.dirname(index), decryptor),
1734 decryptor = decryptor
24ddf0a2
ERE
1735 )
1736 self._data.append(s)
1737 else:
ea6d3c3e 1738 # make paths absolute to avoid cwd problems
24ddf0a2
ERE
1739 if not os.path.isabs(backup_path):
1740 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
ea6d3c3e 1741
ec57ce53
ERE
1742 # update the new_volume_handler of tar_obj
1743 tarobj.new_volume_handler = partial(self.new_volume_handler,
b7c47f38 1744 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
133d30da 1745 self._deltatar.decryptor)
ea6d3c3e
ERE
1746 s = dict(
1747 curr_vol_no = None,
1748 vol_fd = None,
1749 offset = -1,
24ddf0a2
ERE
1750 tarobj = tarobj,
1751 path = backup_path,
1752 is_full = True,
670f9934
ERE
1753 iterator = None,
1754 last_itelement = None,
1755 last_lno = 0,
d5e1d60f
PG
1756 new_volume_handler = tarobj.new_volume_handler,
1757 decryptor = self._deltatar.decryptor
ea6d3c3e
ERE
1758 )
1759 self._data.append(s)
1760
3031b7ae 1761
ea6d3c3e
ERE
1762 def cleanup(self):
1763 '''
1764 Closes all open files
1765 '''
1766 for data in self._data:
55b2ffd0
ERE
1767 if data['vol_fd']:
1768 data['vol_fd'].close()
1769 data['vol_fd'] = None
ea6d3c3e
ERE
1770 if data['tarobj']:
1771 data['tarobj'].close()
1772 data['tarobj'] = None
ea6d3c3e
ERE
1773
1774 def delete(self, path):
1775 '''
1776 Delete a file
1777 '''
df99a044
ERE
1778 if not os.path.exists(path):
1779 return
1780
24ddf0a2 1781 # to preserve parent directory mtime, we save it
283fbd5e 1782 parent_dir = os.path.dirname(path) or os.getcwd()
24ddf0a2
ERE
1783 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1784
561bc39f 1785 if os.path.isdir(path) and not os.path.islink(path):
ea6d3c3e
ERE
1786 shutil.rmtree(path)
1787 else:
1788 os.unlink(path)
1789
24ddf0a2
ERE
1790 # now we restore parent_directory mtime
1791 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1792
4da27cfe 1793 def restore(self, itpath, l_no, callback=None):
ea6d3c3e 1794 '''
8a54d5dd 1795 Restore the path from the appropriate backup. Receives the current path
e8d95fe5 1796 from the newest (=first) index iterator. itpath must be not null.
b0aef801 1797 callback is a custom function that gets called for every file.
037994ca
PG
1798
1799 NB: This function takes the attribute ``_data`` as input but will only
1800 ever use its first and, if available, second element. Anything else in
1801 ``._data[]`` will be ignored.
ea6d3c3e 1802 '''
ea6d3c3e
ERE
1803 path = itpath['path']
1804
4da27cfe
SA
1805 # Calls the callback function
1806 if callback:
1807 callback()
1808
ea6d3c3e 1809 if path.startswith('delete://'):
df86af81
ERE
1810 # the file has previously been deleted already in restore_backup in
1811 # all cases so we just need to finish
ea6d3c3e 1812 return
df86af81 1813
e8d95fe5 1814 # get data from newest index (_data[0])
df86af81
ERE
1815 data = self._data[0]
1816 upath = self._deltatar.unprefixed(path)
1817
24ddf0a2 1818 # to preserve parent directory mtime, we save it
283fbd5e 1819 parent_dir = os.path.dirname(upath) or os.getcwd()
37ab0f57 1820 os.makedirs(parent_dir, exist_ok=True)
24ddf0a2
ERE
1821 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1822
e8d95fe5 1823 # if path is found in the newest index as to be snapshotted, deal with it
df86af81
ERE
1824 # and finish
1825 if path.startswith('snapshot://'):
65b35c42 1826 self.restore_file(itpath, data, path, l_no, upath)
24ddf0a2
ERE
1827
1828 # now we restore parent_directory mtime
1829 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
ea6d3c3e
ERE
1830 return
1831
1832 # we go from index to index, finding the path in the index, then finding
1833 # the index with the most recent snapshot of the file being restored
e8d95fe5
TJ
1834 #
1835 # Right now we support diff backups, only. No incremental backups.
1836 # As a result _data[0] is always the diff backup index
1837 # and _data[1] the full backup index.
527670c4 1838 if len(self._data) == 2:
7273719c 1839 data = self._data[1]
527670c4
TJ
1840 d, l_no, dpath = self.find_path_in_index(data, upath)
1841 if not d:
1842 self._deltatar.logger.warning('Error restoring file %s from '
1843 'index, not found in index %s' % (path, data['path']))
1844 return
1845
1846 cur_path = d.get('path', '')
1847 if cur_path.startswith('delete://'):
1848 self._deltatar.logger.warning(('Strange thing happened, file '
1849 '%s was listed in first index but deleted by another '
1850 'one. Path was ignored and untouched.') % path)
1851 return
1852 elif cur_path.startswith('snapshot://'):
1853 # this code path is reached when the file is unchanged
1854 # in the newest index and therefore of type 'list://'
1855 self.restore_file(d, data, path, l_no, dpath)
1856
1857 # now we restore parent_directory mtime
1858 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1859 return
1860
1861 # error code path is reached when:
1862 # a) we have more than two indexes (unsupported atm)
1863 # b) both indexes contain a list:// entry (logic error)
1864 # c) we have just one index and it also contains list://
4bda6f45 1865 self._deltatar.logger.warning(('Error restoring file %s from index, '
ea6d3c3e
ERE
1866 'snapshot not found in any index') % path)
1867
670f9934
ERE
1868 def find_path_in_index(self, data, upath):
1869 # NOTE: we restart the iterator sometimes because the iterator can be
1870 # walked over completely multiple times, for example if one path if not
1871 # found in one index and we have to go to the next index.
7273719c
PG
1872 it = data['iterator']
1873 if it is None:
670f9934 1874 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
be60ffd0 1875 d, l_no = it.__next__()
670f9934 1876 else:
670f9934
ERE
1877 d = data['last_itelement']
1878 l_no = data['last_lno']
1879
670f9934 1880 while True:
7273719c 1881 dpath = self._deltatar.unprefixed(d.get('path', ''))
670f9934
ERE
1882 if upath == dpath:
1883 data['last_itelement'] = d
1884 data['last_lno'] = l_no
1885 return d, l_no, dpath
1886
1887 up, dp = self._deltatar.compare_indexes(upath, dpath)
1888 # any time upath should have appeared before current dpath, it means
1889 # upath is just not in this index and we should stop
1890 if dp is None:
1891 data['last_itelement'] = d
1892 data['last_lno'] = l_no
1893 return None, 0, ''
1894
1895 try:
be60ffd0 1896 d, l_no = it.__next__()
670f9934
ERE
1897 except StopIteration:
1898 data['last_itelement'] = d
1899 data['last_lno'] = l_no
1900 return None, 0, ''
670f9934 1901
0501fe0a
ERE
1902 def restore_directories_permissions(self):
1903 '''
1904 Restore directory permissions when everything have been restored
1905 '''
42c04ead
ERE
1906 try:
1907 import grp, pwd
1908 except ImportError:
1909 grp = pwd = None
1910
0501fe0a
ERE
1911 self._directories.sort(key=operator.attrgetter('name'))
1912 self._directories.reverse()
0501fe0a
ERE
1913
1914 # Set correct owner, mtime and filemode on directories.
1915 for member in self._directories:
1916 dirpath = member.name
1917 try:
42c04ead
ERE
1918 os.chmod(dirpath, member.mode)
1919 os.utime(dirpath, (member.mtime, member.mtime))
253d4cdd 1920 if self.canchown:
42c04ead
ERE
1921 # We have to be root to do so.
1922 try:
1923 g = grp.getgrnam(member.gname)[2]
1924 except KeyError:
1925 g = member.gid
1926 try:
1927 u = pwd.getpwnam(member.uname)[2]
1928 except KeyError:
1929 u = member.uid
1930 try:
4e433e00 1931 if member.issym and hasattr(os, "lchown"):
42c04ead
ERE
1932 os.lchown(dirpath, u, g)
1933 else:
1934 os.chown(dirpath, u, g)
1935 except EnvironmentError:
1936 raise tarfile.ExtractError("could not change owner")
1937
be60ffd0 1938 except tarfile.ExtractError as e:
4bda6f45 1939 self._deltatar.logger.warning('tarfile: %s' % e)
0501fe0a 1940
df86af81 1941 @staticmethod
b750b280 1942 def new_volume_handler(deltarobj, cwd, is_full, backup_path, decryptor, tarobj, base_name, volume_number):
ea6d3c3e 1943 '''
b750b280
PG
1944 Set up a new volume and perform the tasks necessary for transitioning
1945 to the next one.
ea6d3c3e 1946 '''
df86af81
ERE
1947 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1948 volume_number, guess_name=True)
ea6d3c3e
ERE
1949 volume_path = os.path.join(backup_path, volume_name)
1950
1951 # we convert relative paths into absolute because CWD is changed
1952 if not os.path.isabs(volume_path):
1953 volume_path = os.path.join(cwd, volume_path)
b750b280
PG
1954
1955 tarobj.open_volume(volume_path, encryption=decryptor)
ea6d3c3e 1956
253d4cdd 1957 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
ea6d3c3e
ERE
1958 '''
1959 Restores a snapshot of a file from a specific backup
1960 '''
ea6d3c3e 1961 op_type = file_data.get('type', -1)
24ddf0a2 1962 member = file_data.get('member', None)
9f9ae874 1963 ismember = bool(member)
24ddf0a2
ERE
1964
1965 # when member is set, then we can assume everything is right and we
1966 # just have to restore the path
a2a37de7 1967 if member is None:
24ddf0a2
ERE
1968 vol_no = file_data.get('volume', -1)
1969 # sanity check
1970 if not isinstance(vol_no, int) or vol_no < 0:
4bda6f45 1971 self._deltatar.logger.warning('unrecognized type to be restored: '
24ddf0a2
ERE
1972 '%s, line %d' % (op_type, l_no))
1973
1974 # setup the volume that needs to be read. only needed when member is
1975 # not set
a2a37de7 1976 if index_data['curr_vol_no'] != vol_no:
24ddf0a2
ERE
1977 index_data['curr_vol_no'] = vol_no
1978 backup_path = os.path.dirname(index_data['path'])
1979 vol_name = self._deltatar.volume_name_func(backup_path,
1980 index_data['is_full'], vol_no, guess_name=True)
1981 vol_path = os.path.join(backup_path, vol_name)
1982 if index_data['vol_fd']:
1983 index_data['vol_fd'].close()
be60ffd0 1984 index_data['vol_fd'] = open(vol_path, 'rb')
24ddf0a2
ERE
1985
1986 # force reopen of the tarobj because of new volume
1987 if index_data['tarobj']:
1988 index_data['tarobj'].close()
1989 index_data['tarobj'] = None
1990
1991 # seek tarfile if needed
1992 offset = file_data.get('offset', -1)
ea6d3c3e 1993 if index_data['tarobj']:
c52fd26b 1994 if self._disaster == tarfile.TOLERANCE_RESCUE:
24ddf0a2
ERE
1995 # force a seek and reopen
1996 index_data['tarobj'].close()
1997 index_data['tarobj'] = None
c52fd26b
PG
1998 else:
1999 try:
2000 member = index_data['tarobj'].__iter__().__next__()
2001 except tarfile.DecryptionError:
2002 pass
2003 except tarfile.CompressionError:
2004 pass
2005
2006 if not member or member.path != file_data['path']:
2007 # force a seek and reopen
2008 index_data['tarobj'].close()
2009 index_data['tarobj'] = None
2010
24ddf0a2
ERE
2011
2012 # open the tarfile if needed
2013 if not index_data['tarobj']:
2014 index_data['vol_fd'].seek(offset)
2015 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
2016 fileobj=index_data['vol_fd'],
2017 format=tarfile.GNU_FORMAT,
d1c38f40 2018 concat='#' in self._deltatar.mode,
d5e1d60f 2019 encryption=index_data["decryptor"],
253d4cdd 2020 new_volume_handler=index_data['new_volume_handler'],
044585c6 2021 save_to_members=False,
04f4c7ab 2022 tolerance=self._disaster)
24ddf0a2 2023
be60ffd0 2024 member = index_data['tarobj'].__iter__().__next__()
ea6d3c3e 2025
253d4cdd
ERE
2026 member.path = unprefixed_path
2027 member.name = unprefixed_path
0501fe0a
ERE
2028
2029 if op_type == 'directory':
253d4cdd 2030 self.add_member_dir(member)
0501fe0a 2031 member = copy.copy(member)
be60ffd0 2032 member.mode = 0o0700
0501fe0a 2033
df86af81
ERE
2034 # if it's an existing directory, we then don't need to recreate it
2035 # just set the right permissions, mtime and that kind of stuff
2036 if os.path.exists(member.path):
2037 return
2038
9f9ae874 2039 if not ismember:
24ddf0a2
ERE
2040 # set current volume number in tarobj, otherwise the extraction of the
2041 # file might fail when trying to extract a multivolume member
2042 index_data['tarobj'].volume_number = index_data['curr_vol_no']
86a6e741 2043
9b13f5c4
PG
2044 def ignore_symlink (member, *_args):
2045 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
786addd6 2046
ea6d3c3e 2047 # finally, restore the file
c650acfa
PG
2048 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink,
2049 unlink=True)
253d4cdd
ERE
2050
2051 def add_member_dir(self, member):
2052 '''
2053 Add member dir to be restored at the end
2054 '''
4e433e00 2055 if not self.canchown:
253d4cdd
ERE
2056 self._directories.append(DirItem(name=member.name, mode=member.mode,
2057 mtime=member.mtime))
2058 else:
2059 self._directories.append(DirItem(name=member.name, mode=member.mode,
2060 mtime=member.mtime, gname=member.gname, uname=member.uname,
4e433e00 2061 uid=member.uid, gid=member.gid, issym=member.issym()))
253d4cdd
ERE
2062
2063class DirItem(object):
2064 def __init__(self, **kwargs):
be60ffd0 2065 for k, v in kwargs.items():
9f9ae874 2066 setattr(self, k, v)