fix incorrect error handling in deltatar
[python-delta-tar] / deltatar / deltatar.py
CommitLineData
6b2fa38f 1#!/usr/bin/env python3
0708a374 2
51797cd6 3# Copyright (C) 2013, 2014 Intra2net AG
0708a374
ERE
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; either version 3 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see
17# <http://www.gnu.org/licenses/lgpl-3.0.html>
18
938c2d54
PG
19DELTATAR_HEADER_VERSION = 1
20DELTATAR_PARAMETER_VERSION = 1
3fdea6d4 21
0708a374
ERE
22import logging
23import datetime
6c678f3a 24import binascii
938c2d54 25import io
0501fe0a 26import operator
0708a374 27import os
0501fe0a 28import copy
82de3376 29import shutil
8a8fadda 30import re
e82f14f5
ERE
31import stat
32import json
0708a374
ERE
33from functools import partial
34
35from . import tarfile
2ae46844 36from . import crypto
0708a374 37
0708a374
ERE
38class NullHandler(logging.Handler):
39 def emit(self, record):
40 pass
24ddf0a2
ERE
41
42
0708a374
ERE
43logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
44
974408b5
ERE
45
46# match mode
47NO_MATCH = False
48MATCH = True
49PARENT_MATCH = 2
50
133d30da
PG
51# encryption direction
52CRYPTO_MODE_ENCRYPT = 0
53CRYPTO_MODE_DECRYPT = 1
54
13cc7dfc
PG
55# The canonical extension for encrypted backup files regardless of the actual
56# encryption parameters is “.pdtcrypt”. This is analogous to the encryption
57# header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
58# Since the introduction of the versioned header there no longer any need
59# for encoding encryption parameters in the file extensions (“.aes128” and
60# suchlike).
61PDTCRYPT_EXTENSION = "pdtcrypt"
2cdd9faf
PG
62PDT_TYPE_ARCHIVE = 0
63PDT_TYPE_AUX = 1
13cc7dfc 64
9eccb1c2
PG
65AUXILIARY_FILE_INDEX = 0
66AUXILIARY_FILE_INFO = 1
67
0708a374
ERE
68class DeltaTar(object):
69 '''
70 Backup class used to create backups
71 '''
72
73 # list of files to exclude in the backup creation or restore operation. It
74 # can contain python regular expressions.
75 excluded_files = []
76
77 # list of files to include in the backup creation or restore operation. It
78 # can contain python regular expressions. If empty, all files in the source
79 # path will be backed up (when creating a backup) or all the files in the
a83fa4ed 80 # backup will be restored (when restoring a backup), but if included_files
0708a374
ERE
81 # is set then only the files include in the list will be processed.
82 included_files = []
83
84 # custom filter of files to be backed up (or restored). Unused and unset
85 # by default. The function receives a file path and must return a boolean.
86 filter_func = None
87
da26094a
ERE
88 # mode in which the delta will be created (when creating a backup) or
89 # opened (when restoring). Accepts modes analog to the tarfile library.
90 mode = ""
0708a374
ERE
91
92 # used together with aes modes to encrypt and decrypt backups.
93 password = None
1f3fd7b0
PG
94 crypto_key = None
95 nacl = None
0708a374 96
dbee011c
PG
97 # parameter version to use when encrypting; note that this has no effect
98 # on decryption since the required settings are determined from the headers
54f909ca 99 crypto_version = DELTATAR_HEADER_VERSION
dbee011c
PG
100 crypto_paramversion = None
101
133d30da 102 # when encrypting or decrypting, these hold crypto handlers; created before
2ae46844 103 # establishing the Tarfile stream iff a password is supplied.
133d30da
PG
104 encryptor = None
105 decryptor = None
2ae46844 106
0708a374
ERE
107 # python logger object.
108 logger = None
109
3a7e1a50
ERE
110 # specifies the index mode in the same format as @param mode, but without
111 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
2ae46844 112 # that the index is encrypted if no password is given in the constructor.
3a7e1a50 113 index_mode = None
0708a374
ERE
114
115 # current time for this backup. Used for file names and file creation checks
116 current_time = None
117
9eae9a1f
ERE
118 # extra data to included in the header of the index file when creating a
119 # backup
120 extra_data = dict()
121
0708a374
ERE
122 # valid tarfile modes and their corresponding default file extension
123 __file_extensions_dict = {
da26094a
ERE
124 '': '',
125 ':': '',
126 ':gz': '.gz',
127 ':bz2': '.bz2',
128 '|': '',
129 '|gz': '.gz',
130 '|bz2': '.bz2',
131 '#gz': '.gz',
6e99d23a
PG
132 '#gz.pdtcrypt': '.gz',
133 '#pdtcrypt': '',
d1c38f40 134 '#': '',
0708a374
ERE
135 }
136
3a7e1a50
ERE
137 # valid index modes and their corresponding default file extension
138 __index_extensions_dict = {
139 '': '',
140 'gz': '.gz',
141 'bz2': '.bz2',
6e99d23a
PG
142 'gz.pdtcrypt': '.gz',
143 'pdtcrypt': '',
3a7e1a50
ERE
144 }
145
8adbe50d
ERE
146 # valid path prefixes
147 __path_prefix_list = [
148 u'snapshot://',
149 u'list://',
150 u'delete://'
151 ]
152
0708a374 153 def __init__(self, excluded_files=[], included_files=[],
da26094a 154 filter_func=None, mode="", password=None,
1f3fd7b0 155 crypto_key=None, nacl=None,
54f909ca 156 crypto_version=DELTATAR_HEADER_VERSION,
dbee011c 157 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
3a7e1a50 158 logger=None, index_mode=None, index_name_func=None,
0708a374
ERE
159 volume_name_func=None):
160 '''
161 Constructor. Configures the diff engine.
162
163 Parameters:
164 - excluded_files: list of files to exclude in the backup creation or
165 restore operation. It can contain python regular expressions.
166
167 - included_files: list of files to include in the backup creation or
168 restore operation. It can contain python regular expressions. If
169 empty, all files in the source path will be backed up (when creating a
170 backup) or all the files in the backup will be restored (when
a83fa4ed 171 restoring a backup), but if included_files is set then only the files
0708a374
ERE
172 include in the list will be processed.
173
174 - filter_func: custom filter of files to be backed up (or restored).
175 Unused and unset by default. The function receives a file path and
176 must return a boolean.
177
178 - mode: mode in which the delta will be created (when creating a backup)
179 or opened (when restoring). Accepts the same modes as the tarfile
180 library. Valid modes are:
181
da26094a
ERE
182 '' open uncompressed
183 ':' open uncompressed
184 ':gz' open with gzip compression
185 ':bz2' open with bzip2 compression
186 '|' open an uncompressed stream of tar blocks
187 '|gz' open a gzip compressed stream of tar blocks
188 '|bz2' open a bzip2 compressed stream of tar blocks
189 '#gz' open a stream of gzip compressed tar blocks
0708a374 190
1f3fd7b0
PG
191 - crypto_key: used to encrypt and decrypt backups. Encryption will
192 be enabled automatically if a key is supplied. Requires a salt to be
193 passed as well.
194
195 - nacl: salt that was used to derive the encryption key for embedding
196 in the PDTCRYPT header. Not needed when decrypting and when
197 encrypting with password.
198
6e99d23a
PG
199 - password: used to encrypt and decrypt backups. Encryption will be
200 enabled automatically if a password is supplied.
0708a374 201
54f909ca
PG
202 - crypto_version: version of the format, determining the kind of PDT
203 object header.
204
dbee011c
PG
205 - crypto_paramversion: optionally request encryption conforming to
206 a specific parameter version. Defaults to the standard PDT value
207 which as of 2017 is the only one available.
208
0708a374
ERE
209 - logger: python logger object. Optional.
210
3a7e1a50 211 - index_mode: specifies the index mode in the same format as @param
6e99d23a
PG
212 mode, but without the ':', '|' or '#' at the begining. If encryption
213 is requested it will extend to the auxiliary (index, info) files as
214 well. This is an optional parameter that will automatically mimic
215 @param mode by default if not provided. Valid modes are:
3a7e1a50
ERE
216
217 '' open uncompressed
218 'gz' open with gzip compression
219 'bz2' open with bzip2 compression
0708a374
ERE
220
221 - index_name_func: function that sets a custom name for the index file.
2cc6e32b
PG
222 This function receives a flag to indicate whether the name will be
223 used for a full or diff backup. The backup path will be prepended to
224 its return value.
0708a374
ERE
225
226 - volume_name_func: function that defines the name of tar volumes. It
227 receives the backup_path, if it's a full backup and the volume number,
228 and must return the name for the corresponding volume name. Optional,
229 DeltaTar has default names for tar volumes.
230 '''
231
da26094a 232 if mode not in self.__file_extensions_dict:
8a54d5dd
PG
233 raise Exception('Unrecognized extension mode=[%s] requested for files'
234 % str(mode))
0708a374
ERE
235
236 self.excluded_files = excluded_files
237 self.included_files = included_files
238 self.filter_func = filter_func
239 self.logger = logging.getLogger('deltatar.DeltaTar')
240 if logger:
241 self.logger.addHandler(logger)
242 self.mode = mode
2ae46844 243
1f3fd7b0
PG
244 if crypto_key is not None:
245 self.crypto_key = crypto_key
246 self.nacl = nacl # encryption only
247
2ae46844
PG
248 if password is not None:
249 self.password = password
3a7e1a50 250
54f909ca
PG
251 if crypto_version is not None:
252 self.crypto_version = crypto_version
253
dbee011c
PG
254 if crypto_paramversion is not None:
255 self.crypto_paramversion = crypto_paramversion
256
3a7e1a50
ERE
257 # generate index_mode
258 if index_mode is None:
259 index_mode = ''
6e99d23a 260 if 'gz' in mode:
3a7e1a50
ERE
261 index_mode = "gz"
262 elif 'bz2' in mode:
263 index_mode = "bz2"
264 elif mode not in self.__index_extensions_dict:
8a54d5dd
PG
265 raise Exception('Unrecognized extension mode=[%s] requested for index'
266 % str(mode))
3a7e1a50
ERE
267
268 self.index_mode = index_mode
0708a374
ERE
269 self.current_time = datetime.datetime.now()
270
271 if index_name_func is not None:
272 self.index_name_func = index_name_func
273
274 if volume_name_func is not None:
275 self.volume_name_func = volume_name_func
276
e54cfec5 277 def pick_extension(self, kind, mode=None):
2cdd9faf
PG
278 """
279 Choose the extension depending on a) the kind of file given, b) the
280 processing mode, and c) the current encryption settings.
281 """
282 ret = ""
283 if kind == PDT_TYPE_ARCHIVE:
284 ret += ".tar"
e54cfec5
PG
285 if mode is None:
286 mode = self.__index_extensions_dict [self.index_mode]
2cdd9faf 287 ret += mode
a83fa4ed 288 if self.crypto_key is not None or self.password is not None:
2cdd9faf
PG
289 ret += "." + PDTCRYPT_EXTENSION
290 return ret
291
f0287fb7 292 def index_name_func(self, is_full): # pylint: disable=method-hidden
0708a374 293 '''
2cc6e32b
PG
294 Callback for setting a custom name for the index file. Depending on
295 whether *is_full* is set, it will create a suitable name for a full
296 or a diff backup.
0708a374
ERE
297 '''
298 prefix = "bfull" if is_full else "bdiff"
f7940c31 299 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf
PG
300 extension = self.pick_extension \
301 (PDT_TYPE_AUX,
302 self.__index_extensions_dict [self.index_mode])
0708a374 303
da26094a 304 return "%s-%s.index%s" % (prefix, date_str, extension)
0708a374 305
f0287fb7
CH
306 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
307 is_full, volume_number,
308 guess_name=False):
0708a374
ERE
309 '''
310 function that defines the name of tar volumes. It receives the
311 backup_path, if it's a full backup and the volume number, and must return
312 the name for the corresponding volume name. Optional, DeltaTar has default
313 names for tar volumes.
df86af81
ERE
314
315 If guess_name is activated, the file is intended not to be created but
316 to be found, and thus the date will be guessed.
0708a374
ERE
317 '''
318 prefix = "bfull" if is_full else "bdiff"
2cdd9faf
PG
319 extension = self.pick_extension \
320 (PDT_TYPE_ARCHIVE,
321 self.__file_extensions_dict [self.mode])
0708a374 322
df86af81 323 if not guess_name:
f7940c31 324 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf 325 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
df86af81
ERE
326 else:
327 prefix = prefix + "-"
90b75470 328 postfix = "-%03d%s" % (volume_number + 1, extension)
86a6e741
ERE
329 for f in os.listdir(backup_path):
330 if f.startswith(prefix) and f.endswith(postfix):
331 return f
df86af81
ERE
332 raise Exception("volume not found")
333
0708a374 334
974408b5 335 def filter_path(self, path, source_path="", is_dir=None):
8a8fadda
ERE
336 '''
337 Filters a path, given the source_path, using the filtering properties
338 set in the constructor.
339 The filtering order is:
340 1. included_files (if any)
341 2. excluded_files
342 3. filter_func (which must return whether the file is accepted or not)
343 '''
75059f3c 344
c1af2184 345 if len(source_path) > 0:
75059f3c
CH
346 # ensure that exactly one '/' at end of dir is also removed
347 source_path = source_path.rstrip(os.sep) + os.sep
8a8fadda
ERE
348 path = path[len(source_path):]
349
350 # 1. filter included_files
974408b5 351 match = MATCH
8a8fadda 352 if len(self.included_files) > 0:
974408b5 353 match = NO_MATCH
8a8fadda
ERE
354 for i in self.included_files:
355 # it can be either a regexp or a string
be60ffd0 356 if isinstance(i, str):
8a8fadda
ERE
357 # if the string matches, then continue
358 if i == path:
974408b5 359 match = MATCH
c1af2184 360 break
8a8fadda
ERE
361
362 # if the string ends with / it's a directory, and if the
7b07645e 363 # path is contained in it, it is included
c1af2184 364 if i.endswith('/') and path.startswith(i):
974408b5 365 match = MATCH
c1af2184 366 break
8a8fadda
ERE
367
368 # if the string doesn't end with /, add it and do the same
369 # check
c1af2184 370 elif path.startswith(i + '/'):
974408b5 371 match = MATCH
c1af2184 372 break
8a8fadda 373
974408b5
ERE
374 # check for PARENT_MATCH
375 if is_dir:
376 dir_path = path
377 if not dir_path.endswith('/'):
378 dir_path += '/'
379
380 if i.startswith(dir_path):
381 match = PARENT_MATCH
382
8a8fadda
ERE
383 # if it's a reg exp, then we just check if it matches
384 elif isinstance(i, re._pattern_type):
c1af2184 385 if i.match(path):
974408b5 386 match = MATCH
c1af2184 387 break
8a8fadda 388 else:
4bda6f45 389 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
8a8fadda 390
974408b5
ERE
391 if match == NO_MATCH:
392 return NO_MATCH
c1af2184 393
974408b5
ERE
394 # when a directory is in PARENT_MATCH, it doesn't matter if it's
395 # excluded. It's subfiles will be excluded, but the directory itself
396 # won't
397 if match != PARENT_MATCH:
8a8fadda
ERE
398 for e in self.excluded_files:
399 # it can be either a regexp or a string
be60ffd0 400 if isinstance(e, str):
8a8fadda 401 # if the string matches, then exclude
c1af2184 402 if e == path:
974408b5 403 return NO_MATCH
8a8fadda
ERE
404
405 # if the string ends with / it's a directory, and if the
406 # path starts with the directory, then exclude
c1af2184 407 if e.endswith('/') and path.startswith(e):
974408b5 408 return NO_MATCH
8a8fadda
ERE
409
410 # if the string doesn't end with /, do the same check with
411 # the slash added
c1af2184 412 elif path.startswith(e + '/'):
974408b5 413 return NO_MATCH
8a8fadda
ERE
414
415 # if it's a reg exp, then we just check if it matches
c1af2184
ERE
416 elif isinstance(e, re._pattern_type):
417 if e.match(path):
974408b5 418 return NO_MATCH
8a8fadda 419 else:
4bda6f45 420 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
8a8fadda
ERE
421
422 if self.filter_func:
423 return self.filter_func(path)
424
974408b5 425 return match
8a8fadda 426
283fbd5e 427 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
0708a374
ERE
428 '''
429 Walk a directory recursively, yielding each file/directory
c059a221
PG
430
431 Returns the path of an entity. If ``keep_base_dir`` is set,
432 the path returned contains the prefix ``source_path``; otherwise it is
433 relative to the prefix.
0708a374
ERE
434 '''
435
283fbd5e 436 source_path = source_path.rstrip(os.sep)
0708a374 437
283fbd5e 438 if keep_base_dir:
adf7dac4 439 beginning_size = 0
283fbd5e
CH
440 else:
441 beginning_size = len(source_path) + 1 # +1 for os.sep
442
443 queue = [source_path]
444
d07c8065 445 while queue:
df86af81 446 cur_path = queue.pop(0)
0708a374 447
e76ca7e0
PG
448 try:
449 dfd = os.open (cur_path, os.O_DIRECTORY)
450 except FileNotFoundError as exn:
451 self.logger.warning ("failed to open entity [%s] as directory; "
452 "file system (error: %s); skipping"
453 % (cur_path, str (exn)))
d86735e4
ERE
454 continue
455
c059a221
PG
456 try:
457 for filename in sorted(os.listdir(dfd)):
458 child = os.path.join(cur_path, filename)
459 is_dir = os.path.isdir(child)
460 status = self.filter_path(child, source_path, is_dir)
461 if status == NO_MATCH:
462 continue
463 if not os.access(child, os.R_OK):
464 self.logger.warning('Error accessing possibly locked file %s' % child)
465 continue
466
467 if status == MATCH:
468 yield child[beginning_size:]
469
470 if is_dir and (status == MATCH or status == PARENT_MATCH):
471 queue.append(child)
472 finally:
473 os.close (dfd)
0708a374 474
e82f14f5
ERE
475 def _stat_dict(self, path):
476 '''
477 Returns a dict with the stat data used to compare files
478 '''
479 stinfo = os.stat(path)
480 mode = stinfo.st_mode
481
482 ptype = None
483 if stat.S_ISDIR(mode):
d07c8065 484 ptype = u'directory'
e82f14f5 485 elif stat.S_ISREG(mode):
d07c8065 486 ptype = u'file'
e82f14f5 487 elif stat.S_ISLNK(mode):
d07c8065 488 ptype = u'link'
e82f14f5
ERE
489
490 return {
d07c8065 491 u'type': ptype,
be60ffd0 492 u'path': path,
d07c8065 493 u'mode': mode,
0501fe0a
ERE
494 u'mtime': int(stinfo.st_mtime),
495 u'ctime': int(stinfo.st_ctime),
d07c8065
ERE
496 u'uid': stinfo.st_uid,
497 u'gid': stinfo.st_gid,
498 u'inode': stinfo.st_ino,
499 u'size': stinfo.st_size
e82f14f5
ERE
500 }
501
df99a044 502 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
d07c8065
ERE
503 '''
504 Return if the dicts are equal in the stat keys
505 '''
fc8fdcbc 506 keys = [u'type', u'mode',u'size', u'mtime',
d041935c 507 # not restored: u'inode', u'ctime'
df99a044 508 ]
8adbe50d 509
fc8fdcbc 510 # only if user is root, then also check gid/uid. otherwise do not check it,
d041935c 511 # because tarfile can chown in case of being superuser only
50d70ca9
PG
512 #
513 # also, skip the check in rpmbuild since the sources end up with the
514 # uid:gid of the packager while the extracted files are 0:0.
515 if hasattr(os, "geteuid") and os.geteuid() == 0 \
516 and os.getenv ("RPMBUILD_OPTIONS") is None:
fc8fdcbc
ERE
517 keys.append('gid')
518 keys.append('uid')
519
ea6d3c3e 520 if (not d1 and d2 != None) or (d1 != None and not d2):
8adbe50d
ERE
521 return False
522
cbac9f0b
ERE
523 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
524 return False
8adbe50d 525
fc8fdcbc
ERE
526 type = d1.get('type', '')
527
d07c8065 528 for key in keys:
fc8fdcbc
ERE
529 # size doesn't matter for directories
530 if type == 'directory' and key == 'size':
531 continue
d07c8065
ERE
532 if d1.get(key, -1) != d2.get(key, -2):
533 return False
534 return True
535
df99a044 536 def prefixed(self, path, listsnapshot_equal=False):
8adbe50d
ERE
537 '''
538 if a path is not prefixed, return it prefixed
539 '''
540 for prefix in self.__path_prefix_list:
541 if path.startswith(prefix):
df99a044
ERE
542 if listsnapshot_equal and prefix == u'list://':
543 return u'snapshot://' + path[len(prefix):]
8adbe50d
ERE
544 return path
545 return u'snapshot://' + path
546
547 def unprefixed(self, path):
548 '''
549 remove a path prefix if any
550 '''
551 for prefix in self.__path_prefix_list:
552 if path.startswith(prefix):
553 return path[len(prefix):]
554 return path
555
133d30da
PG
556
557 def initialize_encryption (self, mode):
558 password = self.password
1f3fd7b0
PG
559 key = self.crypto_key
560 nacl = self.nacl
133d30da 561
1f3fd7b0 562 if key is None and password is None:
133d30da
PG
563 return
564 if mode == CRYPTO_MODE_ENCRYPT:
1f3fd7b0
PG
565 return crypto.Encrypt (password=password,
566 key=key,
567 nacl=nacl,
54f909ca 568 version=self.crypto_version,
774ca538 569 paramversion=self.crypto_paramversion)
133d30da 570 if mode == CRYPTO_MODE_DECRYPT:
1f3fd7b0 571 return crypto.Decrypt (password=password, key=key)
133d30da
PG
572
573 raise Exception ("invalid encryption mode [%r]" % mode)
574
575
9eccb1c2 576 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
3a7e1a50 577 '''
9eccb1c2
PG
578 Given the specified configuration, opens a file for reading or writing,
579 inheriting the encryption and compression settings from the backup.
580 Returns a file object ready to use.
3fdea6d4 581
c8c72fe1
PG
582 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
583 respectively).
584 :type mode: str
774ca538
PG
585 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
586 Both the info and the auxiliary file have a globally
587 unique, constant counter value.
3fdea6d4 588 :type kind: str
3a7e1a50 589 '''
3a7e1a50
ERE
590 if self.index_mode.startswith('gz'):
591 comptype = 'gz'
592 elif self.index_mode.startswith('bz2'):
593 comptype = 'bz2'
594 else:
595 comptype = 'tar'
596
133d30da 597 crypto_ctx = None
6de9444a 598 enccounter = None
133d30da 599 if mode == "w":
774ca538 600 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 601 elif mode == "r":
774ca538 602 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
133d30da 603
3031b7ae
PG
604 if crypto_ctx is not None:
605 if kind == AUXILIARY_FILE_INFO:
606 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
607 elif kind == AUXILIARY_FILE_INDEX:
608 enccounter = crypto.AES_GCM_IV_CNT_INDEX
609 else:
610 raise Exception ("invalid kind of aux file %r" % kind)
611
c8c72fe1 612 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
3fdea6d4 613 bufsize=tarfile.RECORDSIZE, fileobj=None,
6de9444a 614 encryption=crypto_ctx, enccounter=enccounter)
c8c72fe1
PG
615
616 return sink
617
3a7e1a50 618
0708a374 619 def create_full_backup(self, source_path, backup_path,
d4a05db6 620 max_volume_size=None, extra_data=dict()):
0708a374
ERE
621 '''
622 Creates a full backup.
623
624 Parameters:
625 - source_path: source path to the directory to back up.
626 - backup_path: path where the back up will be stored. Backup path will
627 be created if not existent.
d5361dac
ERE
628 - max_volume_size: maximum volume size in megabytes. Used to split the
629 backup in volumes. Optional (won't split in volumes by default).
9eae9a1f
ERE
630 - extra_data: a json-serializable dictionary with information that you
631 want to be included in the header of the index file
0708a374
ERE
632 '''
633 # check input
be60ffd0 634 if not isinstance(source_path, str):
0708a374
ERE
635 raise Exception('Source path must be a string')
636
be60ffd0 637 if not isinstance(backup_path, str):
0708a374
ERE
638 raise Exception('Backup path must be a string')
639
640 if not os.path.exists(source_path) or not os.path.isdir(source_path):
641 raise Exception('Source path "%s" does not exist or is not a '\
642 'directory' % source_path)
643
d07c8065
ERE
644 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
645 max_volume_size < 1):
646 raise Exception('max_volume_size must be a positive integer')
d5361dac
ERE
647 if max_volume_size != None:
648 max_volume_size = max_volume_size*1024*1024
649
9eae9a1f
ERE
650 if not isinstance(extra_data, dict):
651 raise Exception('extra_data must be a dictionary')
652
653 try:
654 extra_data_str = json.dumps(extra_data)
655 except:
656 raise Exception('extra_data is not json-serializable')
657
0708a374
ERE
658 if not os.access(source_path, os.R_OK):
659 raise Exception('Source path "%s" is not readable' % source_path)
660
661 # try to create backup path if needed
37ab0f57 662 os.makedirs(backup_path, exist_ok=True)
0708a374
ERE
663
664 if not os.access(backup_path, os.W_OK):
665 raise Exception('Backup path "%s" is not writeable' % backup_path)
666
667 if source_path.endswith('/'):
668 source_path = source_path[:-1]
669
670 if backup_path.endswith('/'):
671 backup_path = backup_path[:-1]
672
673 # update current time
674 self.current_time = datetime.datetime.now()
675
676 if self.mode not in self.__file_extensions_dict:
677 raise Exception('Unrecognized extension')
678
2ae46844 679 # setup for encrypting payload
774ca538
PG
680 if self.encryptor is None:
681 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
2ae46844 682
0708a374 683 # some initialization
11684b1d 684 self.vol_no = 0
0708a374
ERE
685
686 # generate the first volume name
687 vol_name = self.volume_name_func(backup_path, True, 0)
688 tarfile_path = os.path.join(backup_path, vol_name)
689
774ca538
PG
690 # init index
691 index_name = self.index_name_func(True)
692 index_path = os.path.join(backup_path, index_name)
693 index_sink = self.open_auxiliary_file(index_path, 'w')
e82f14f5 694
d5361dac
ERE
695 cwd = os.getcwd()
696
b7c47f38 697 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
0708a374
ERE
698 '''
699 Handles the new volumes
700 '''
d5361dac
ERE
701 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
702 volume_path = os.path.join(backup_path, volume_name)
11684b1d 703 deltarobj.vol_no = volume_number
d5361dac
ERE
704
705 # we convert relative paths into absolute because CWD is changed
706 if not os.path.isabs(volume_path):
707 volume_path = os.path.join(cwd, volume_path)
11684b1d 708
8e019196
ERE
709 if tarobj.fileobj is not None:
710 tarobj.fileobj.close()
711
b008f989
ERE
712 deltarobj.logger.debug("opening volume %s" % volume_path)
713
b7c47f38 714 tarobj.open_volume(volume_path, encryption=encryption)
d5361dac
ERE
715
716 # wraps some args from context into the handler
133d30da 717 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
0708a374 718
774ca538 719 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
6c678f3a 720
be60ffd0 721 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
6c678f3a 722 # calculate checksum and write into the stream
c2ffe2ec 723 crc = binascii.crc32(s) & 0xFFFFffff
774ca538 724 index_sink.write(s)
e82f14f5 725
0708a374
ERE
726 # start creating the tarfile
727 tarobj = tarfile.TarFile.open(tarfile_path,
da26094a 728 mode='w' + self.mode,
0708a374 729 format=tarfile.GNU_FORMAT,
d1c38f40 730 concat='#' in self.mode,
133d30da 731 encryption=self.encryptor,
0708a374 732 max_volume_size=max_volume_size,
ea625b04 733 new_volume_handler=new_volume_handler,
e2b59b34
ERE
734 save_to_members=False,
735 dereference=True)
e5c6ca04 736 os.chdir(source_path)
55b8686d
ERE
737
738 # for each file to be in the backup, do:
e82f14f5 739 for path in self._recursive_walk_dir('.'):
3e9b81bb
PG
740
741 try: # backup file
fd743c26
PG
742 # calculate stat dict for current file
743 statd = self._stat_dict(path)
744 statd['path'] = u'snapshot://' + statd['path']
745 statd['volume'] = self.vol_no
746
747 # backup file
3e9b81bb
PG
748 tarobj.add(path, arcname = statd['path'], recursive=False)
749 except FileNotFoundError as exn:
750 # file vanished since the call to access(3) above
751 self.logger.warning ("object [%s] no longer available in "
752 "file system (error: %s); skipping"
753 % (path, str (exn)))
754 continue # prevent indexing
11684b1d 755
55b8686d 756 # retrieve file offset
253d4cdd 757 statd['offset'] = tarobj.get_last_member_offset()
b008f989 758 self.logger.debug("backup %s" % statd['path'])
6c678f3a 759
d041935c 760 # store the stat dict in the index
be60ffd0 761 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
6c678f3a 762 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 763 index_sink.write(s)
e82f14f5 764
be60ffd0 765 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
6c678f3a 766 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 767 index_sink.write(s)
be60ffd0 768 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
774ca538
PG
769 index_sink.write(s)
770
e5c6ca04 771 os.chdir(cwd)
0708a374 772 tarobj.close()
c8c72fe1 773 index_sink.close (close_fileobj=True)
938c2d54 774
0708a374 775 def create_diff_backup(self, source_path, backup_path, previous_index_path,
d4a05db6 776 max_volume_size=None, extra_data=dict()):
0708a374
ERE
777 '''
778 Creates a backup.
779
780 Parameters:
781 - source_path: source path to the directory to back up.
782 - backup_path: path where the back up will be stored. Backup path will
783 be created if not existent.
784 - previous_index_path: index of the previous backup, needed to know
785 which files changed since then.
786 - max_volume_size: maximum volume size in megabytes (MB). Used to split
787 the backup in volumes. Optional (won't split in volumes by default).
3a7e1a50
ERE
788
789 NOTE: previous index is assumed to follow exactly the same format as
790 the index_mode setup in the constructor.
0708a374 791 '''
d07c8065 792 # check/sanitize input
be60ffd0 793 if not isinstance(source_path, str):
d07c8065
ERE
794 raise Exception('Source path must be a string')
795
be60ffd0 796 if not isinstance(backup_path, str):
d07c8065
ERE
797 raise Exception('Backup path must be a string')
798
799 if not os.path.exists(source_path) or not os.path.isdir(source_path):
800 raise Exception('Source path "%s" does not exist or is not a '\
801 'directory' % source_path)
802
9eae9a1f
ERE
803 if not isinstance(extra_data, dict):
804 raise Exception('extra_data must be a dictionary')
805
806 try:
807 extra_data_str = json.dumps(extra_data)
808 except:
809 raise Exception('extra_data is not json-serializable')
810
d07c8065
ERE
811 if not os.access(source_path, os.R_OK):
812 raise Exception('Source path "%s" is not readable' % source_path)
813
814 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
815 max_volume_size < 1):
816 raise Exception('max_volume_size must be a positive integer')
817 if max_volume_size != None:
818 max_volume_size = max_volume_size*1024*1024
819
be60ffd0 820 if not isinstance(previous_index_path, str):
d07c8065
ERE
821 raise Exception('previous_index_path must be A string')
822
823 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
824 raise Exception('Index path "%s" does not exist or is not a '\
825 'file' % previous_index_path)
826
827 if not os.access(previous_index_path, os.R_OK):
828 raise Exception('Index path "%s" is not readable' % previous_index_path)
829
830 # try to create backup path if needed
37ab0f57 831 os.makedirs(backup_path, exist_ok=True)
d07c8065
ERE
832
833 if not os.access(backup_path, os.W_OK):
834 raise Exception('Backup path "%s" is not writeable' % backup_path)
835
836 if source_path.endswith('/'):
837 source_path = source_path[:-1]
838
839 if backup_path.endswith('/'):
840 backup_path = backup_path[:-1]
841
842 # update current time
843 self.current_time = datetime.datetime.now()
844
845 if self.mode not in self.__file_extensions_dict:
846 raise Exception('Unrecognized extension')
847
2ae46844 848 # setup for encrypting payload
774ca538
PG
849 if self.encryptor is None:
850 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 851
d07c8065
ERE
852 # some initialization
853 self.vol_no = 0
854
855 # generate the first volume name
df86af81
ERE
856 vol_name = self.volume_name_func(backup_path, is_full=False,
857 volume_number=0)
d07c8065
ERE
858 tarfile_path = os.path.join(backup_path, vol_name)
859
938c2d54 860 # init index
d07c8065
ERE
861 cwd = os.getcwd()
862
3031b7ae
PG
863 index_name = self.index_name_func(is_full=False)
864 index_path = os.path.join(backup_path, index_name)
865 index_sink = self.open_auxiliary_file(index_path, 'w')
866
d07c8065
ERE
867 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
868 '''
869 Handles the new volumes
870 '''
df86af81
ERE
871 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
872 volume_number=volume_number)
d07c8065
ERE
873 volume_path = os.path.join(backup_path, volume_name)
874 deltarobj.vol_no = volume_number
875
876 # we convert relative paths into absolute because CWD is changed
877 if not os.path.isabs(volume_path):
878 volume_path = os.path.join(cwd, volume_path)
879
f624ff3d 880 deltarobj.logger.debug("opening volume %s" % volume_path)
d07c8065
ERE
881 tarobj.open_volume(volume_path)
882
883 # wraps some args from context into the handler
884 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
885
3031b7ae 886 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
d07c8065 887
be60ffd0 888 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
d07c8065 889 # calculate checksum and write into the stream
c2ffe2ec 890 crc = binascii.crc32(s) & 0xFFFFffff
3031b7ae 891 index_sink.write(s)
d07c8065
ERE
892
893 # start creating the tarfile
894 tarobj = tarfile.TarFile.open(tarfile_path,
895 mode='w' + self.mode,
896 format=tarfile.GNU_FORMAT,
d1c38f40 897 concat='#' in self.mode,
133d30da 898 encryption=self.encryptor,
d07c8065 899 max_volume_size=max_volume_size,
ea625b04 900 new_volume_handler=new_volume_handler,
e2b59b34
ERE
901 save_to_members=False,
902 dereference=True)
d07c8065 903
aae127d0
ERE
904
905 # create the iterators, first the previous index iterator, then the
906 # source path directory iterator and collate and iterate them
907 if not os.path.isabs(previous_index_path):
908 previous_index_path = os.path.join(cwd, previous_index_path)
909 index_it = self.iterate_index_path(previous_index_path)
910
d07c8065 911 os.chdir(source_path)
aae127d0
ERE
912 dir_it = self._recursive_walk_dir('.')
913 dir_path_it = self.jsonize_path_iterator(dir_it)
d07c8065 914
df86af81
ERE
915 def pr(path):
916 if not path:
917 return "None"
918 else:
919 return path["path"]
8edb2e3c 920
d07c8065 921 # for each file to be in the backup, do:
df86af81 922 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
aae127d0
ERE
923 action = None
924 # if file is not in the index, it means it's a new file, so we have
925 # to take a snapshot
df86af81 926
aae127d0
ERE
927 if not ipath:
928 action = 'snapshot'
929 # if the file is not in the directory iterator, it means that it has
d041935c 930 # been deleted, so we need to mark it as such
aae127d0
ERE
931 elif not dpath:
932 action = 'delete'
933 # if the file is in both iterators, it means it might have either
934 # not changed (in which case we will just list it in our index but
935 # it will not be included in the tar file), or it might have
e8d95fe5 936 # changed, in which case we will snapshot it.
aae127d0
ERE
937 elif ipath and dpath:
938 if self._equal_stat_dicts(ipath, dpath):
939 action = 'list'
940 else:
941 action = 'snapshot'
942 # TODO: when creating chained backups (i.e. diffing from another
943 # diff), we will need to detect the type of action in the previous
944 # index, because if it was delete and dpath is None, we should
945 # discard the file
946
947 if action == 'snapshot':
948 # calculate stat dict for current file
949 stat = dpath.copy()
be60ffd0 950 stat['path'] = "snapshot://" + dpath['path']
aae127d0
ERE
951 stat['volume'] = self.vol_no
952
50f43227
ERE
953 self.logger.debug("[STORE] %s" % dpath['path'])
954
3e9b81bb
PG
955 try: # backup file
956 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
957 # retrieve file offset
958 stat['offset'] = tarobj.get_last_member_offset()
959 except FileNotFoundError as exn:
960 # file vanished since the call to access(3) above
961 self.logger.warning ("object [%s] no longer available in "
962 "file system (error: %s); skipping"
963 % (dpath ["path"], str (exn)))
964 stat = None # prevent indexing
aae127d0 965
aae127d0 966 elif action == 'delete':
50f43227 967 path = self.unprefixed(ipath['path'])
aae127d0 968 stat = {
50f43227 969 u'path': u'delete://' + path,
aae127d0
ERE
970 u'type': ipath['type']
971 }
50f43227 972 self.logger.debug("[DELETE] %s" % path)
aae127d0
ERE
973
974 # mark it as deleted in the backup
42d39ca7 975 tarobj.add("/dev/null", arcname=stat['path'])
aae127d0
ERE
976 elif action == 'list':
977 stat = dpath.copy()
50f43227
ERE
978 path = self.unprefixed(ipath['path'])
979 stat['path'] = u'list://' + path
aae127d0 980 # unchanged files do not enter in the backup, only in the index
50f43227 981 self.logger.debug("[UNCHANGED] %s" % path)
80910564
TJ
982 else:
983 # should not happen
4bda6f45 984 self.logger.warning('unknown action in create_diff_backup: {0}'
80910564
TJ
985 ''.format(action))
986 stat = None
aae127d0 987
80910564
TJ
988 if stat:
989 # store the stat dict in the index
be60ffd0 990 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
aae127d0 991 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 992 index_sink.write(s)
aae127d0 993
be60ffd0 994 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
aae127d0 995 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 996 index_sink.write(s)
be60ffd0 997 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
3031b7ae 998 index_sink.write(s)
938c2d54 999
df86af81 1000 index_it.release()
aae127d0
ERE
1001 os.chdir(cwd)
1002 tarobj.close()
938c2d54
PG
1003 index_sink.close()
1004
1005
d07c8065 1006 def iterate_index_path(self, index_path):
df86af81
ERE
1007 '''
1008 Returns an index iterator. Internally, it uses a classic iterator class.
1009 We do that instead of just yielding so that the iterator object can have
1010 an additional function to close the file descriptor that is opened in
1011 the constructor.
1012 '''
d07c8065 1013
df86af81
ERE
1014 class IndexPathIterator(object):
1015 def __init__(self, delta_tar, index_path):
1016 self.delta_tar = delta_tar
1017 self.index_path = index_path
1018 self.f = None
9eae9a1f 1019 self.extra_data = dict()
df86af81 1020 self.__enter__()
d07c8065 1021
df86af81
ERE
1022 def __iter__(self):
1023 return self
d07c8065 1024
df86af81
ERE
1025 def release(self):
1026 if self.f:
1027 self.f.close()
1028
1029 def __enter__(self):
1030 '''
1031 Allows this iterator to be used with the "with" statement
1032 '''
1033 if self.f is None:
9eccb1c2 1034 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
df86af81
ERE
1035 # check index header
1036 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1037 if j.get("type", '') != 'python-delta-tar-index' or\
1038 j.get('version', -1) != 1:
1039 raise Exception("invalid index file format: %s" % json.dumps(j))
1040
9eae9a1f
ERE
1041 self.extra_data = j.get('extra_data', dict())
1042
df86af81
ERE
1043 # find BEGIN-FILE-LIST, ignore other headers
1044 while True:
1045 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1046 if j.get('type', '') == 'BEGIN-FILE-LIST':
1047 break
1048 return self
1049
1050 def __exit__(self, type, value, tb):
1051 '''
1052 Allows this iterator to be used with the "with" statement
1053 '''
ec57ce53
ERE
1054 if self.f:
1055 self.f.close()
df86af81 1056 self.f = None
d07c8065 1057
be60ffd0 1058 def __next__(self):
0349168a 1059 # read each file in the index and process it to do the restore
df86af81
ERE
1060 j = {}
1061 l_no = -1
1062 try:
1063 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
be60ffd0 1064 except Exception as e:
df86af81
ERE
1065 if self.f:
1066 self.f.close()
1067 raise e
d07c8065 1068
df86af81 1069 op_type = j.get('type', '')
d07c8065 1070
df86af81
ERE
1071 # when we detect the end of the list, break the loop
1072 if op_type == 'END-FILE-LIST':
1073 if self.f:
1074 self.f.close()
1075 raise StopIteration
1076
1077 # check input
1078 if op_type not in ['directory', 'file', 'link']:
4bda6f45 1079 self.delta_tar.logger.warning('unrecognized type to be '
df86af81
ERE
1080 'restored: %s, line %d' % (op_type, l_no))
1081 # iterate again
be60ffd0 1082 return self.__next__()
df86af81
ERE
1083
1084 return j, l_no
d07c8065 1085
df86af81 1086 return IndexPathIterator(self, index_path)
d07c8065 1087
26fdd428 1088 def iterate_tar_path(self, tar_path, new_volume_handler=None):
24ddf0a2
ERE
1089 '''
1090 Returns a tar iterator that iterates jsonized member items that contain
1091 an additional "member" field, used by RestoreHelper.
1092 '''
ec57ce53 1093 class TarPathIterator(object):
83a81852 1094 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
24ddf0a2 1095 self.delta_tar = delta_tar
ec57ce53 1096 self.tar_path = tar_path
24ddf0a2 1097 self.tar_obj = None
6bca471c 1098 self.last_member = None
26fdd428 1099 self.new_volume_handler = new_volume_handler
24ddf0a2
ERE
1100 self.__enter__()
1101
1102 def __iter__(self):
1103 return self
1104
1105 def release(self):
1106 if self.tar_obj:
1107 self.tar_obj.close()
1108
1109 def __enter__(self):
1110 '''
1111 Allows this iterator to be used with the "with" statement
1112 '''
1113 if self.tar_obj is None:
d5e1d60f
PG
1114 decryptor = None
1115 if self.delta_tar.password is not None:
1f3fd7b0
PG
1116 decryptor = crypto.Decrypt \
1117 (password=self.delta_tar.password,
1118 key=self.delta_tar.crypto_key)
ec57ce53
ERE
1119 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1120 mode='r' + self.delta_tar.mode,
1121 format=tarfile.GNU_FORMAT,
d1c38f40 1122 concat='#' in self.delta_tar.mode,
d5e1d60f 1123 encryption=decryptor,
83a81852 1124 new_volume_handler=self.new_volume_handler,
e2b59b34
ERE
1125 save_to_members=False,
1126 dereference=True)
24ddf0a2
ERE
1127 return self
1128
1129 def __exit__(self, type, value, tb):
1130 '''
1131 Allows this iterator to be used with the "with" statement
1132 '''
ec57ce53
ERE
1133 if self.tar_obj:
1134 self.tar_obj.close()
24ddf0a2
ERE
1135 self.tar_obj = None
1136
be60ffd0 1137 def __next__(self):
24ddf0a2
ERE
1138 '''
1139 Read each member and return it as a stat dict
1140 '''
be60ffd0 1141 tarinfo = self.tar_obj.__iter__().__next__()
8e019196
ERE
1142 # NOTE: here we compare if tarinfo.path is the same as before
1143 # instead of comparing the tarinfo object itself because the
1144 # object itself might change for multivol tarinfos
1145 if tarinfo is None or (self.last_member is not None and\
1146 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
ec57ce53
ERE
1147 raise StopIteration
1148
6bca471c
ERE
1149 self.last_member = tarinfo
1150
24ddf0a2
ERE
1151 ptype = 'unknown'
1152 if tarinfo.isfile():
1153 ptype = 'file'
1154 elif tarinfo.isdir():
ab7e7465 1155 ptype = 'directory'
24ddf0a2
ERE
1156 elif tarinfo.islnk() or tarinfo.issym():
1157 ptype = 'link'
1158
1159 return {
1160 u'type': ptype,
1161 u'path': tarinfo.path,
1162 u'mode': tarinfo.mode,
1163 u'mtime': tarinfo.mtime,
1164 u'ctime': -1, # cannot restore
1165 u'uid': tarinfo.uid,
1166 u'gid': tarinfo.gid,
1167 u'inode': -1, # cannot restore
1168 u'size': tarinfo.size,
1169 u'member': tarinfo
ec57ce53
ERE
1170 }, 0
1171
26fdd428 1172 return TarPathIterator(self, tar_path, new_volume_handler)
24ddf0a2 1173
df99a044 1174 def jsonize_path_iterator(self, iter, strip=0):
d07c8065
ERE
1175 '''
1176 converts the yielded items of an iterator into json path lines.
df99a044
ERE
1177
1178 strip: Strip the smallest prefix containing num leading slashes from
1179 the file path.
d07c8065
ERE
1180 '''
1181 while True:
1182 try:
be60ffd0 1183 path = iter.__next__()
df99a044 1184 if strip == 0:
4ac6d333 1185 yield self._stat_dict(path), 0
df99a044
ERE
1186 else:
1187 st = self._stat_dict(path)
1188 st['path'] = "/".join(path.split("/")[strip:])
4ac6d333 1189 yield st, 0
d07c8065
ERE
1190 except StopIteration:
1191 break
1192
b84beea7
PG
1193 def iterate_disaster_index (self, index):
1194 """
1195 Mimick the behavior of the other object iterators, just with the inputs
1196 supplied directly as *index*.
1197 """
1198
1199 class RawIndexIterator(object):
65b35c42 1200 def __init__(self, delta_tar, index):
b84beea7
PG
1201 self.delta_tar = delta_tar
1202 self.index = index
1203 self.__enter__()
1204
1205 def __iter__(self):
1206 return self
1207
1208 def release(self):
65b35c42 1209 pass
b84beea7
PG
1210
1211 def __enter__(self):
1212 '''
1213 Allows this iterator to be used with the "with" statement
1214 '''
1215 self.iter = self.index.__iter__ ()
1216 return self
1217
1218 def __exit__(self, type, value, tb):
1219 '''
1220 Allows this iterator to be used with the "with" statement
1221 '''
1222
1223 def __next__(self):
1224 idxent = self.iter.__next__ ()
65b35c42 1225 return idxent, 0
b84beea7
PG
1226
1227 return RawIndexIterator(self, index)
1228
d07c8065
ERE
1229 def collate_iterators(self, it1, it2):
1230 '''
1231 Collate two iterators, so that it returns pairs of the items of each
1232 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1233 when there's no match for the items in the other iterator.
1234
1235 It assumes that the items in both lists are ordered in the same way.
1236 '''
ea6d3c3e 1237 l_no = 0
d07c8065
ERE
1238 elem1, elem2 = None, None
1239 while True:
1240 if not elem1:
1241 try:
be60ffd0 1242 elem1, l_no = it1.__next__()
d07c8065
ERE
1243 except StopIteration:
1244 if elem2:
ea6d3c3e 1245 yield (None, elem2, l_no)
d07c8065 1246 for elem2 in it2:
ea6d3c3e
ERE
1247 if isinstance(elem2, tuple):
1248 elem2 = elem2[0]
1249 yield (None, elem2, l_no)
d07c8065 1250 break
d07c8065
ERE
1251 if not elem2:
1252 try:
be60ffd0 1253 elem2 = it2.__next__()
d07c8065
ERE
1254 if isinstance(elem2, tuple):
1255 elem2 = elem2[0]
1256 except StopIteration:
1257 if elem1:
ea6d3c3e 1258 yield (elem1, None, l_no)
df99a044 1259 for elem1, l_no in it1:
ea6d3c3e 1260 yield (elem1, None, l_no)
d07c8065 1261 break
670f9934
ERE
1262
1263 index1 = self.unprefixed(elem1['path'])
1264 index2 = self.unprefixed(elem2['path'])
1265 i1, i2 = self.compare_indexes(index1, index2)
1266
1267 yield1 = yield2 = None
1268 if i1 is not None:
1269 yield1 = elem1
1270 elem1 = None
1271 if i2 is not None:
1272 yield2 = elem2
1273 elem2 = None
1274 yield (yield1, yield2, l_no)
1275
1276 def compare_indexes(self, index1, index2):
1277 '''
1278 Compare iterator indexes and return a tuple in the following form:
1279 if index1 < index2, returns (index1, None)
1280 if index1 == index2 returns (index1, index2)
1281 else: returns (None, index2)
1282 '''
1283 l1 = index1.split('/')
1284 l2 = index2.split('/')
1285 length = len(l2) - len(l1)
1286
1287 if length > 0:
1288 return (index1, None)
1289 elif length < 0:
1290 return (None, index2)
1291
1292 for i1, i2 in zip(l1, l2):
1293 if i1 < i2:
1294 return (index1, None)
1295 elif i1 > i2:
1296 return (None, index2)
1297
1298 return (index1, index2)
0708a374 1299
8c65a2b1 1300 def list_backup(self, backup_tar_path, list_func=None):
be60ffd0 1301 if not isinstance(backup_tar_path, str):
8c65a2b1
ERE
1302 raise Exception('Backup tar path must be a string')
1303
1304 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1305 raise Exception('Source path "%s" does not exist or is not a '\
1306 'file' % backup_tar_path)
1307
1308 if not os.access(backup_tar_path, os.R_OK):
1309 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1310
1311 cwd = os.getcwd()
1312
b7c47f38 1313 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
8c65a2b1
ERE
1314 '''
1315 Handles the new volumes
1316 '''
1317 volume_name = deltarobj.volume_name_func(backup_path, True,
1318 volume_number, guess_name=True)
1319 volume_path = os.path.join(backup_path, volume_name)
1320
1321 # we convert relative paths into absolute because CWD is changed
1322 if not os.path.isabs(volume_path):
1323 volume_path = os.path.join(cwd, volume_path)
b7c47f38
PG
1324 tarobj.open_volume(volume_path, encryption=encryption)
1325
774ca538
PG
1326 if self.decryptor is None:
1327 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
8c65a2b1
ERE
1328
1329 backup_path = os.path.dirname(backup_tar_path)
1330 if not os.path.isabs(backup_path):
1331 backup_path = os.path.join(cwd, backup_path)
133d30da 1332 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
b7a6566b 1333
8c65a2b1
ERE
1334 tarobj = tarfile.TarFile.open(backup_tar_path,
1335 mode='r' + self.mode,
1336 format=tarfile.GNU_FORMAT,
d1c38f40 1337 concat='#' in self.mode,
133d30da 1338 encryption=self.decryptor,
ea625b04 1339 new_volume_handler=new_volume_handler,
e2b59b34
ERE
1340 save_to_members=False,
1341 dereference=True)
8c65a2b1
ERE
1342
1343 def filter(cls, list_func, tarinfo):
1344 if list_func is None:
b008f989 1345 self.logger.info(tarinfo.path)
8c65a2b1
ERE
1346 else:
1347 list_func(tarinfo)
1348 return False
1349 filter = partial(filter, self, list_func)
1350
1351 tarobj.extractall(filter=filter)
1352 tarobj.close()
1353
0708a374 1354 def restore_backup(self, target_path, backup_indexes_paths=[],
e93f83f1 1355 backup_tar_path=None, restore_callback=None,
b84beea7 1356 disaster=tarfile.TOLERANCE_STRICT, backup_index=None):
0708a374
ERE
1357 '''
1358 Restores a backup.
1359
1360 Parameters:
0708a374
ERE
1361 - target_path: path to restore.
1362 - backup_indexes_paths: path to backup indexes, in descending date order.
1363 The indexes indicate the location of their respective backup volumes,
1364 and multiple indexes are needed to be able to restore diff backups.
1365 Note that this is an optional parameter: if not suplied, it will
1366 try to restore directly from backup_tar_path.
1367 - backup_tar_path: path to the backup tar file. Used as an alternative
1368 to backup_indexes_paths to restore directly from a tar file without
1369 using any file index. If it's a multivol tarfile, volume_name_func
1370 will be called.
4da27cfe 1371 - restore_callback: callback function to be called during restore.
b0aef801 1372 This is passed to the helper and gets called for every file.
11684b1d 1373
3a7e1a50 1374 NOTE: If you want to use an index to restore a backup, this function
11684b1d
ERE
1375 only supports to do so when the tarfile mode is either uncompressed or
1376 uses concat compress mode, because otherwise it would be very slow.
3a7e1a50
ERE
1377
1378 NOTE: Indices are assumed to follow the same format as the index_mode
1379 specified in the constructor.
e93f83f1
PG
1380
1381 Returns the list of files that could not be restored, if there were
1382 any.
0708a374 1383 '''
11684b1d 1384 # check/sanitize input
be60ffd0 1385 if not isinstance(target_path, str):
e5c6ca04
ERE
1386 raise Exception('Target path must be a string')
1387
11684b1d
ERE
1388 if backup_indexes_paths is None and backup_tar_path == []:
1389 raise Exception("You have to either provide index paths or a tar path")
e5c6ca04 1390
b84beea7
PG
1391 if isinstance (backup_index, list) is True:
1392 mode = "disaster"
1393 elif len(backup_indexes_paths) == 0:
ea6d3c3e
ERE
1394 mode = "tar"
1395 else:
1396 mode = "diff"
1397
1398 if mode == "tar":
be60ffd0 1399 if not isinstance(backup_tar_path, str):
11684b1d
ERE
1400 raise Exception('Backup tar path must be a string')
1401
1402 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1403 raise Exception('Source path "%s" does not exist or is not a '\
1404 'file' % backup_tar_path)
1405
1406 if not os.access(backup_tar_path, os.R_OK):
1407 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1408 else:
1409 if not isinstance(backup_indexes_paths, list):
1410 raise Exception('backup_indexes_paths must be a list')
1411
1412 if self.mode.startswith(':') or self.mode.startswith('|'):
1413 raise Exception('Restore only supports either uncompressed tars'
1414 ' or concat compression when restoring from an index, and '
1415 ' the open mode you provided is "%s"' % self.mode)
1416
1417 for index in backup_indexes_paths:
be60ffd0 1418 if not isinstance(index, str):
11684b1d 1419 raise Exception('indices must be strings')
e5c6ca04 1420
11684b1d
ERE
1421 if not os.path.exists(index) or not os.path.isfile(index):
1422 raise Exception('Index path "%s" does not exist or is not a '\
1423 'file' % index)
1424
1425 if not os.access(index, os.R_OK):
1426 raise Exception('Index path "%s" is not readable' % index)
e5c6ca04
ERE
1427
1428 # try to create backup path if needed
37ab0f57 1429 os.makedirs(target_path, exist_ok=True)
e5c6ca04 1430
ec57ce53
ERE
1431 # make backup_tar_path absolute so that iterate_tar_path works fine
1432 if backup_tar_path and not os.path.isabs(backup_tar_path):
1433 backup_tar_path = os.path.abspath(backup_tar_path)
1434
d5361dac 1435 cwd = os.getcwd()
ec57ce53 1436 os.chdir(target_path)
d5361dac 1437
2ae46844 1438 # setup for decrypting payload
774ca538
PG
1439 if self.decryptor is None:
1440 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
2ae46844 1441
ea6d3c3e 1442 if mode == 'tar':
24ddf0a2
ERE
1443 index_it = self.iterate_tar_path(backup_tar_path)
1444 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
ec57ce53 1445 tarobj=index_it.tar_obj)
ea6d3c3e 1446 elif mode == "diff":
04f4c7ab
PG
1447 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1448 disaster=disaster)
f3d10816
PG
1449 try:
1450 # get iterator from newest index at _data[0]
1451 index1 = helper._data[0]["path"]
1452 index_it = self.iterate_index_path(index1)
1453 except tarfile.DecryptionError as exn:
1454 self.logger.error("failed to decrypt file [%s]: %s; is this an "
afc87ebc
PG
1455 "actual encrypted index file?"
1456 % (index1, str (exn)))
1457 return [(index1, exn)]
1458 except Exception as exn:
1459 # compressed files
1460 self.logger.error("failed to read file [%s]: %s; is this an "
1461 "actual index file?" % (index1, str (exn)))
f3d10816 1462 return [(index1, exn)]
b84beea7
PG
1463 elif mode == "disaster":
1464 index_it = self.iterate_disaster_index (backup_index)
65b35c42
PG
1465 helper = RestoreHelper (self, cwd, backup_path=backup_tar_path,
1466 backup_index=backup_index,
1467 disaster=disaster)
b84beea7 1468
d07c8065 1469
24ddf0a2
ERE
1470 dir_it = self._recursive_walk_dir('.')
1471 dir_path_it = self.jsonize_path_iterator(dir_it)
11684b1d 1472
e93f83f1
PG
1473 failed = [] # irrecoverable files
1474
a395759e 1475 # for each file to be restored, do:
24ddf0a2
ERE
1476 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1477 if not ipath:
1478 upath = dpath['path']
1479 op_type = dpath['type']
1480 else:
1481 upath = self.unprefixed(ipath['path'])
1482 op_type = ipath['type']
42c04ead 1483
24ddf0a2 1484 # filter paths
75059f3c 1485 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
24ddf0a2 1486 continue
ea6d3c3e 1487
24ddf0a2
ERE
1488 # if types of the file mismatch, the file needs to be deleted
1489 # and re-restored
1490 if ipath is not None and dpath is not None and\
1491 dpath['type'] != ipath['type']:
1492 helper.delete(upath)
1493
1494 # if file not found in dpath, we can directly restore from index
1495 if not dpath:
1496 # if the file doesn't exist and it needs to be deleted, it
1497 # means that work is already done
1498 if ipath['path'].startswith('delete://'):
ea6d3c3e 1499 continue
24ddf0a2 1500 try:
b008f989 1501 self.logger.debug("restore %s" % ipath['path'])
4da27cfe 1502 helper.restore(ipath, l_no, restore_callback)
be60ffd0 1503 except Exception as e:
e93f83f1 1504 iipath = ipath.get ("path", "")
7b07645e 1505 self.logger.error("FAILED to restore: {} ({})"
e93f83f1 1506 .format(iipath, e))
04f4c7ab 1507 if disaster != tarfile.TOLERANCE_STRICT:
e93f83f1 1508 failed.append ((iipath, e))
24ddf0a2 1509 continue
11684b1d 1510
24ddf0a2
ERE
1511 # if both files are equal, we have nothing to restore
1512 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1513 continue
1514
1515 # we have to restore the file, but first we need to delete the
1516 # current existing file.
1517 # we don't delete the file if it's a directory, because it might
1518 # just have changed mtime, so it's quite inefficient to remove
1519 # it
1520 if ipath:
1521 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
42c04ead 1522 helper.delete(upath)
b008f989 1523 self.logger.debug("restore %s" % ipath['path'])
e93f83f1
PG
1524 try:
1525 helper.restore(ipath, l_no, restore_callback)
1526 except Exception as e:
04f4c7ab 1527 if disaster == tarfile.TOLERANCE_STRICT:
e93f83f1
PG
1528 raise
1529 failed.append ((ipath.get ("path", ""), e))
1530 continue
24ddf0a2
ERE
1531
1532 # if the file is not in the index (so it comes from the target
1533 # directory) then we have to delete it
1534 else:
c9d47a03 1535 self.logger.debug("delete %s" % upath)
24ddf0a2 1536 helper.delete(upath)
42c04ead 1537
ec57ce53
ERE
1538 helper.restore_directories_permissions()
1539 index_it.release()
1540 os.chdir(cwd)
1541 helper.cleanup()
ea6d3c3e 1542
e93f83f1
PG
1543 return failed
1544
1545
1546 def recover_backup(self, target_path, backup_indexes_paths=[],
1547 restore_callback=None):
1548 """
1549 Walk the index, extracting objects in disaster mode. Bad files are
1550 reported along with a reason.
1551 """
1552 return self.restore_backup(target_path,
1553 backup_indexes_paths=backup_indexes_paths,
04f4c7ab
PG
1554 disaster=tarfile.TOLERANCE_RECOVER)
1555
1556
6690f5e0 1557 def rescue_backup(self, target_path, backup_tar_path,
04f4c7ab
PG
1558 restore_callback=None):
1559 """
1560 More aggressive “unfsck” mode: do not rely on the index data as the
1561 files may be corrupt; skim files for header-like information and
1562 attempt to retrieve the data.
1563 """
27ee4dd4
PG
1564 def gen_volume_name (nvol):
1565 return os.path.join (os.path.dirname (backup_tar_path),
1566 self.volume_name_func (backup_tar_path,
1567 True,
1568 nvol))
1569
1570 backup_index = tarfile.gen_rescue_index (gen_volume_name,
1571 self.mode,
1572 password=self.password,
1573 key=self.crypto_key)
6690f5e0 1574
04f4c7ab 1575 return self.restore_backup(target_path,
b84beea7 1576 backup_index=backup_index,
65b35c42 1577 backup_tar_path=backup_tar_path,
04f4c7ab 1578 disaster=tarfile.TOLERANCE_RESCUE)
e93f83f1
PG
1579
1580
11684b1d
ERE
1581 def _parse_json_line(self, f, l_no):
1582 '''
ee0e095f 1583 Read line from file like object and process it as JSON.
11684b1d
ERE
1584 '''
1585 l = f.readline()
1586 l_no += 1
1587 try:
be60ffd0 1588 j = json.loads(l.decode('UTF-8'))
ee0e095f
PG
1589 except UnicodeDecodeError as e:
1590 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1591 raise Exception \
1592 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1593 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1594 from e
1595 raise Exception \
1596 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1597 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1598 from e
be60ffd0 1599 except ValueError as e:
11684b1d
ERE
1600 raise Exception("error parsing this json line "
1601 "(line number %d): %s" % (l_no, l))
1602 return j, l_no
ea6d3c3e 1603
24ddf0a2 1604
ea6d3c3e
ERE
1605class RestoreHelper(object):
1606 '''
1607 Class used to help to restore files from indices
1608 '''
1609
1610 # holds the dicts of data
1611 _data = []
1612
1613 _deltatar = None
1614
1615 _cwd = None
1616
0501fe0a
ERE
1617 # list of directories to be restored. This is done as a last step, see
1618 # tarfile.extractall for details.
1619 _directories = []
1620
04f4c7ab 1621 _disaster = tarfile.TOLERANCE_STRICT
e93f83f1 1622
037994ca 1623 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
65b35c42
PG
1624 backup_index=None, tarobj=None,
1625 disaster=tarfile.TOLERANCE_STRICT):
ea6d3c3e
ERE
1626 '''
1627 Constructor opens the tars and init the data structures.
1628
037994ca
PG
1629 Assumptions:
1630
1631 - Index list must be provided in reverse order (newer first).
1632 - “newer first” apparently means that if there are n backups
1633 provided, the last full backup is at index n-1 and the most recent
1634 diff backup is at index 0.
1635 - Only the first, the second, and the last elements of
1636 ``index_list`` are relevant, others will not be accessed.
1637 - If no ``index_list`` is provided, both ``tarobj`` and
1638 ``backup_path`` must be passed.
1639 - If ``index_list`` is provided, the values of ``tarobj`` and
1640 ``backup_path`` are ignored.
ea6d3c3e
ERE
1641 '''
1642 self._data = []
0501fe0a 1643 self._directories = []
ea6d3c3e
ERE
1644 self._deltatar = deltatar
1645 self._cwd = cwd
3031b7ae 1646 self._password = deltatar.password
1f3fd7b0 1647 self._crypto_key = deltatar.crypto_key
3031b7ae 1648 self._decryptors = []
e93f83f1 1649 self._disaster = disaster
ea6d3c3e 1650
253d4cdd
ERE
1651 try:
1652 import grp, pwd
1653 except ImportError:
1654 grp = pwd = None
1655
1656 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1657 self.canchown = True
1658 else:
1659 self.canchown = False
1660
65b35c42 1661 if isinstance (backup_index, list) is True:
001bd488 1662 decryptor = self._deltatar.decryptor
65b35c42
PG
1663 self._data = \
1664 [{ "curr_vol_no" : None
1665 , "vol_fd" : None
1666 , "offset" : -1
1667 , "tarobj" : None
1668 , "path" : backup_path
1669 , "is_full" : True
1670 , "iterator" : None
1671 , "last_itelement" : None
1672 , "last_lno" : 0
001bd488
PG
1673 , "new_volume_handler" :
1674 partial(self.new_volume_handler,
1675 self._deltatar, self._cwd, True,
1676 os.path.dirname(backup_path), decryptor)
1677 , "decryptor" : decryptor
65b35c42
PG
1678 }]
1679 elif index_list is not None:
24ddf0a2 1680 for index in index_list:
037994ca 1681 is_full = index == index_list[-1]
24ddf0a2 1682
d5e1d60f 1683 decryptor = None
3031b7ae 1684 if self._password is not None:
1f3fd7b0
PG
1685 decryptor = crypto.Decrypt (password=self._password,
1686 key=self._crypto_key)
d5e1d60f 1687
24ddf0a2
ERE
1688 # make paths absolute to avoid cwd problems
1689 if not os.path.isabs(index):
1690 index = os.path.normpath(os.path.join(cwd, index))
1691
1692 s = dict(
1693 curr_vol_no = None,
1694 vol_fd = None,
1695 offset = -1,
1696 tarobj = None,
1697 path = index,
1698 is_full = is_full,
1699 iterator = None,
1700 last_itelement = None,
1701 last_lno = 0,
1702 new_volume_handler = partial(self.new_volume_handler,
1703 self._deltatar, self._cwd, is_full,
d5e1d60f
PG
1704 os.path.dirname(index), decryptor),
1705 decryptor = decryptor
24ddf0a2
ERE
1706 )
1707 self._data.append(s)
1708 else:
ea6d3c3e 1709 # make paths absolute to avoid cwd problems
24ddf0a2
ERE
1710 if not os.path.isabs(backup_path):
1711 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
ea6d3c3e 1712
ec57ce53
ERE
1713 # update the new_volume_handler of tar_obj
1714 tarobj.new_volume_handler = partial(self.new_volume_handler,
b7c47f38 1715 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
133d30da 1716 self._deltatar.decryptor)
ea6d3c3e
ERE
1717 s = dict(
1718 curr_vol_no = None,
1719 vol_fd = None,
1720 offset = -1,
24ddf0a2
ERE
1721 tarobj = tarobj,
1722 path = backup_path,
1723 is_full = True,
670f9934
ERE
1724 iterator = None,
1725 last_itelement = None,
1726 last_lno = 0,
d5e1d60f
PG
1727 new_volume_handler = tarobj.new_volume_handler,
1728 decryptor = self._deltatar.decryptor
ea6d3c3e
ERE
1729 )
1730 self._data.append(s)
1731
3031b7ae 1732
ea6d3c3e
ERE
1733 def cleanup(self):
1734 '''
1735 Closes all open files
1736 '''
1737 for data in self._data:
55b2ffd0
ERE
1738 if data['vol_fd']:
1739 data['vol_fd'].close()
1740 data['vol_fd'] = None
ea6d3c3e
ERE
1741 if data['tarobj']:
1742 data['tarobj'].close()
1743 data['tarobj'] = None
ea6d3c3e
ERE
1744
1745 def delete(self, path):
1746 '''
1747 Delete a file
1748 '''
df99a044
ERE
1749 if not os.path.exists(path):
1750 return
1751
24ddf0a2 1752 # to preserve parent directory mtime, we save it
283fbd5e 1753 parent_dir = os.path.dirname(path) or os.getcwd()
24ddf0a2
ERE
1754 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1755
561bc39f 1756 if os.path.isdir(path) and not os.path.islink(path):
ea6d3c3e
ERE
1757 shutil.rmtree(path)
1758 else:
1759 os.unlink(path)
1760
24ddf0a2
ERE
1761 # now we restore parent_directory mtime
1762 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1763
4da27cfe 1764 def restore(self, itpath, l_no, callback=None):
ea6d3c3e 1765 '''
8a54d5dd 1766 Restore the path from the appropriate backup. Receives the current path
e8d95fe5 1767 from the newest (=first) index iterator. itpath must be not null.
b0aef801 1768 callback is a custom function that gets called for every file.
037994ca
PG
1769
1770 NB: This function takes the attribute ``_data`` as input but will only
1771 ever use its first and, if available, second element. Anything else in
1772 ``._data[]`` will be ignored.
ea6d3c3e 1773 '''
ea6d3c3e
ERE
1774 path = itpath['path']
1775
4da27cfe
SA
1776 # Calls the callback function
1777 if callback:
1778 callback()
1779
ea6d3c3e 1780 if path.startswith('delete://'):
df86af81
ERE
1781 # the file has previously been deleted already in restore_backup in
1782 # all cases so we just need to finish
ea6d3c3e 1783 return
df86af81 1784
e8d95fe5 1785 # get data from newest index (_data[0])
df86af81
ERE
1786 data = self._data[0]
1787 upath = self._deltatar.unprefixed(path)
1788
24ddf0a2 1789 # to preserve parent directory mtime, we save it
283fbd5e 1790 parent_dir = os.path.dirname(upath) or os.getcwd()
37ab0f57 1791 os.makedirs(parent_dir, exist_ok=True)
24ddf0a2
ERE
1792 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1793
e8d95fe5 1794 # if path is found in the newest index as to be snapshotted, deal with it
df86af81
ERE
1795 # and finish
1796 if path.startswith('snapshot://'):
65b35c42 1797 self.restore_file(itpath, data, path, l_no, upath)
24ddf0a2
ERE
1798
1799 # now we restore parent_directory mtime
1800 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
ea6d3c3e
ERE
1801 return
1802
1803 # we go from index to index, finding the path in the index, then finding
1804 # the index with the most recent snapshot of the file being restored
e8d95fe5
TJ
1805 #
1806 # Right now we support diff backups, only. No incremental backups.
1807 # As a result _data[0] is always the diff backup index
1808 # and _data[1] the full backup index.
527670c4 1809 if len(self._data) == 2:
7273719c 1810 data = self._data[1]
527670c4
TJ
1811 d, l_no, dpath = self.find_path_in_index(data, upath)
1812 if not d:
1813 self._deltatar.logger.warning('Error restoring file %s from '
1814 'index, not found in index %s' % (path, data['path']))
1815 return
1816
1817 cur_path = d.get('path', '')
1818 if cur_path.startswith('delete://'):
1819 self._deltatar.logger.warning(('Strange thing happened, file '
1820 '%s was listed in first index but deleted by another '
1821 'one. Path was ignored and untouched.') % path)
1822 return
1823 elif cur_path.startswith('snapshot://'):
1824 # this code path is reached when the file is unchanged
1825 # in the newest index and therefore of type 'list://'
1826 self.restore_file(d, data, path, l_no, dpath)
1827
1828 # now we restore parent_directory mtime
1829 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1830 return
1831
1832 # error code path is reached when:
1833 # a) we have more than two indexes (unsupported atm)
1834 # b) both indexes contain a list:// entry (logic error)
1835 # c) we have just one index and it also contains list://
4bda6f45 1836 self._deltatar.logger.warning(('Error restoring file %s from index, '
ea6d3c3e
ERE
1837 'snapshot not found in any index') % path)
1838
670f9934
ERE
1839 def find_path_in_index(self, data, upath):
1840 # NOTE: we restart the iterator sometimes because the iterator can be
1841 # walked over completely multiple times, for example if one path if not
1842 # found in one index and we have to go to the next index.
7273719c
PG
1843 it = data['iterator']
1844 if it is None:
670f9934 1845 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
be60ffd0 1846 d, l_no = it.__next__()
670f9934 1847 else:
670f9934
ERE
1848 d = data['last_itelement']
1849 l_no = data['last_lno']
1850
670f9934 1851 while True:
7273719c 1852 dpath = self._deltatar.unprefixed(d.get('path', ''))
670f9934
ERE
1853 if upath == dpath:
1854 data['last_itelement'] = d
1855 data['last_lno'] = l_no
1856 return d, l_no, dpath
1857
1858 up, dp = self._deltatar.compare_indexes(upath, dpath)
1859 # any time upath should have appeared before current dpath, it means
1860 # upath is just not in this index and we should stop
1861 if dp is None:
1862 data['last_itelement'] = d
1863 data['last_lno'] = l_no
1864 return None, 0, ''
1865
1866 try:
be60ffd0 1867 d, l_no = it.__next__()
670f9934
ERE
1868 except StopIteration:
1869 data['last_itelement'] = d
1870 data['last_lno'] = l_no
1871 return None, 0, ''
670f9934 1872
0501fe0a
ERE
1873 def restore_directories_permissions(self):
1874 '''
1875 Restore directory permissions when everything have been restored
1876 '''
42c04ead
ERE
1877 try:
1878 import grp, pwd
1879 except ImportError:
1880 grp = pwd = None
1881
0501fe0a
ERE
1882 self._directories.sort(key=operator.attrgetter('name'))
1883 self._directories.reverse()
0501fe0a
ERE
1884
1885 # Set correct owner, mtime and filemode on directories.
1886 for member in self._directories:
1887 dirpath = member.name
1888 try:
42c04ead
ERE
1889 os.chmod(dirpath, member.mode)
1890 os.utime(dirpath, (member.mtime, member.mtime))
253d4cdd 1891 if self.canchown:
42c04ead
ERE
1892 # We have to be root to do so.
1893 try:
1894 g = grp.getgrnam(member.gname)[2]
1895 except KeyError:
1896 g = member.gid
1897 try:
1898 u = pwd.getpwnam(member.uname)[2]
1899 except KeyError:
1900 u = member.uid
1901 try:
4e433e00 1902 if member.issym and hasattr(os, "lchown"):
42c04ead
ERE
1903 os.lchown(dirpath, u, g)
1904 else:
1905 os.chown(dirpath, u, g)
1906 except EnvironmentError:
1907 raise tarfile.ExtractError("could not change owner")
1908
be60ffd0 1909 except tarfile.ExtractError as e:
4bda6f45 1910 self._deltatar.logger.warning('tarfile: %s' % e)
0501fe0a 1911
df86af81 1912 @staticmethod
b7c47f38 1913 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
ea6d3c3e
ERE
1914 '''
1915 Handles the new volumes
1916 '''
df86af81
ERE
1917 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1918 volume_number, guess_name=True)
ea6d3c3e
ERE
1919 volume_path = os.path.join(backup_path, volume_name)
1920
1921 # we convert relative paths into absolute because CWD is changed
1922 if not os.path.isabs(volume_path):
1923 volume_path = os.path.join(cwd, volume_path)
b7c47f38 1924 tarobj.open_volume(volume_path, encryption=encryption)
ea6d3c3e 1925
253d4cdd 1926 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
ea6d3c3e
ERE
1927 '''
1928 Restores a snapshot of a file from a specific backup
1929 '''
ea6d3c3e 1930 op_type = file_data.get('type', -1)
24ddf0a2 1931 member = file_data.get('member', None)
9f9ae874 1932 ismember = bool(member)
24ddf0a2
ERE
1933
1934 # when member is set, then we can assume everything is right and we
1935 # just have to restore the path
a2a37de7 1936 if member is None:
24ddf0a2
ERE
1937 vol_no = file_data.get('volume', -1)
1938 # sanity check
1939 if not isinstance(vol_no, int) or vol_no < 0:
4bda6f45 1940 self._deltatar.logger.warning('unrecognized type to be restored: '
24ddf0a2
ERE
1941 '%s, line %d' % (op_type, l_no))
1942
1943 # setup the volume that needs to be read. only needed when member is
1944 # not set
a2a37de7 1945 if index_data['curr_vol_no'] != vol_no:
24ddf0a2
ERE
1946 index_data['curr_vol_no'] = vol_no
1947 backup_path = os.path.dirname(index_data['path'])
1948 vol_name = self._deltatar.volume_name_func(backup_path,
1949 index_data['is_full'], vol_no, guess_name=True)
1950 vol_path = os.path.join(backup_path, vol_name)
1951 if index_data['vol_fd']:
1952 index_data['vol_fd'].close()
be60ffd0 1953 index_data['vol_fd'] = open(vol_path, 'rb')
24ddf0a2
ERE
1954
1955 # force reopen of the tarobj because of new volume
1956 if index_data['tarobj']:
1957 index_data['tarobj'].close()
1958 index_data['tarobj'] = None
1959
1960 # seek tarfile if needed
1961 offset = file_data.get('offset', -1)
ea6d3c3e 1962 if index_data['tarobj']:
c52fd26b 1963 if self._disaster == tarfile.TOLERANCE_RESCUE:
24ddf0a2
ERE
1964 # force a seek and reopen
1965 index_data['tarobj'].close()
1966 index_data['tarobj'] = None
c52fd26b
PG
1967 else:
1968 try:
1969 member = index_data['tarobj'].__iter__().__next__()
1970 except tarfile.DecryptionError:
1971 pass
1972 except tarfile.CompressionError:
1973 pass
1974
1975 if not member or member.path != file_data['path']:
1976 # force a seek and reopen
1977 index_data['tarobj'].close()
1978 index_data['tarobj'] = None
1979
24ddf0a2
ERE
1980
1981 # open the tarfile if needed
1982 if not index_data['tarobj']:
1983 index_data['vol_fd'].seek(offset)
1984 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1985 fileobj=index_data['vol_fd'],
1986 format=tarfile.GNU_FORMAT,
d1c38f40 1987 concat='#' in self._deltatar.mode,
d5e1d60f 1988 encryption=index_data["decryptor"],
253d4cdd 1989 new_volume_handler=index_data['new_volume_handler'],
044585c6 1990 save_to_members=False,
04f4c7ab 1991 tolerance=self._disaster)
24ddf0a2 1992
be60ffd0 1993 member = index_data['tarobj'].__iter__().__next__()
ea6d3c3e 1994
253d4cdd
ERE
1995 member.path = unprefixed_path
1996 member.name = unprefixed_path
0501fe0a
ERE
1997
1998 if op_type == 'directory':
253d4cdd 1999 self.add_member_dir(member)
0501fe0a 2000 member = copy.copy(member)
be60ffd0 2001 member.mode = 0o0700
0501fe0a 2002
df86af81
ERE
2003 # if it's an existing directory, we then don't need to recreate it
2004 # just set the right permissions, mtime and that kind of stuff
2005 if os.path.exists(member.path):
2006 return
2007
9f9ae874 2008 if not ismember:
24ddf0a2
ERE
2009 # set current volume number in tarobj, otherwise the extraction of the
2010 # file might fail when trying to extract a multivolume member
2011 index_data['tarobj'].volume_number = index_data['curr_vol_no']
86a6e741 2012
9b13f5c4
PG
2013 def ignore_symlink (member, *_args):
2014 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
786addd6 2015
ea6d3c3e 2016 # finally, restore the file
9b13f5c4 2017 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
253d4cdd
ERE
2018
2019 def add_member_dir(self, member):
2020 '''
2021 Add member dir to be restored at the end
2022 '''
4e433e00 2023 if not self.canchown:
253d4cdd
ERE
2024 self._directories.append(DirItem(name=member.name, mode=member.mode,
2025 mtime=member.mtime))
2026 else:
2027 self._directories.append(DirItem(name=member.name, mode=member.mode,
2028 mtime=member.mtime, gname=member.gname, uname=member.uname,
4e433e00 2029 uid=member.uid, gid=member.gid, issym=member.issym()))
253d4cdd
ERE
2030
2031class DirItem(object):
2032 def __init__(self, **kwargs):
be60ffd0 2033 for k, v in kwargs.items():
9f9ae874 2034 setattr(self, k, v)