drop os.exists() before makedirs()
[python-delta-tar] / deltatar / deltatar.py
CommitLineData
6b2fa38f 1#!/usr/bin/env python3
0708a374 2
51797cd6 3# Copyright (C) 2013, 2014 Intra2net AG
0708a374
ERE
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; either version 3 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see
17# <http://www.gnu.org/licenses/lgpl-3.0.html>
18
938c2d54
PG
19DELTATAR_HEADER_VERSION = 1
20DELTATAR_PARAMETER_VERSION = 1
3fdea6d4 21
0708a374
ERE
22import logging
23import datetime
6c678f3a 24import binascii
938c2d54 25import io
0501fe0a 26import operator
0708a374 27import os
0501fe0a 28import copy
82de3376 29import shutil
8a8fadda 30import re
e82f14f5
ERE
31import stat
32import json
0708a374
ERE
33from functools import partial
34
35from . import tarfile
2ae46844 36from . import crypto
0708a374 37
0708a374
ERE
38class NullHandler(logging.Handler):
39 def emit(self, record):
40 pass
24ddf0a2
ERE
41
42
0708a374
ERE
43logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
44
974408b5
ERE
45
46# match mode
47NO_MATCH = False
48MATCH = True
49PARENT_MATCH = 2
50
133d30da
PG
51# encryption direction
52CRYPTO_MODE_ENCRYPT = 0
53CRYPTO_MODE_DECRYPT = 1
54
13cc7dfc
PG
55# The canonical extension for encrypted backup files regardless of the actual
56# encryption parameters is “.pdtcrypt”. This is analogous to the encryption
57# header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
58# Since the introduction of the versioned header there no longer any need
59# for encoding encryption parameters in the file extensions (“.aes128” and
60# suchlike).
61PDTCRYPT_EXTENSION = "pdtcrypt"
2cdd9faf
PG
62PDT_TYPE_ARCHIVE = 0
63PDT_TYPE_AUX = 1
13cc7dfc 64
9eccb1c2
PG
65AUXILIARY_FILE_INDEX = 0
66AUXILIARY_FILE_INFO = 1
67
0708a374
ERE
68class DeltaTar(object):
69 '''
70 Backup class used to create backups
71 '''
72
73 # list of files to exclude in the backup creation or restore operation. It
74 # can contain python regular expressions.
75 excluded_files = []
76
77 # list of files to include in the backup creation or restore operation. It
78 # can contain python regular expressions. If empty, all files in the source
79 # path will be backed up (when creating a backup) or all the files in the
a83fa4ed 80 # backup will be restored (when restoring a backup), but if included_files
0708a374
ERE
81 # is set then only the files include in the list will be processed.
82 included_files = []
83
84 # custom filter of files to be backed up (or restored). Unused and unset
85 # by default. The function receives a file path and must return a boolean.
86 filter_func = None
87
da26094a
ERE
88 # mode in which the delta will be created (when creating a backup) or
89 # opened (when restoring). Accepts modes analog to the tarfile library.
90 mode = ""
0708a374
ERE
91
92 # used together with aes modes to encrypt and decrypt backups.
93 password = None
1f3fd7b0
PG
94 crypto_key = None
95 nacl = None
0708a374 96
dbee011c
PG
97 # parameter version to use when encrypting; note that this has no effect
98 # on decryption since the required settings are determined from the headers
54f909ca 99 crypto_version = DELTATAR_HEADER_VERSION
dbee011c
PG
100 crypto_paramversion = None
101
133d30da 102 # when encrypting or decrypting, these hold crypto handlers; created before
2ae46844 103 # establishing the Tarfile stream iff a password is supplied.
133d30da
PG
104 encryptor = None
105 decryptor = None
2ae46844 106
0708a374
ERE
107 # python logger object.
108 logger = None
109
3a7e1a50
ERE
110 # specifies the index mode in the same format as @param mode, but without
111 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
2ae46844 112 # that the index is encrypted if no password is given in the constructor.
3a7e1a50 113 index_mode = None
0708a374
ERE
114
115 # current time for this backup. Used for file names and file creation checks
116 current_time = None
117
9eae9a1f
ERE
118 # extra data to included in the header of the index file when creating a
119 # backup
120 extra_data = dict()
121
0708a374
ERE
122 # valid tarfile modes and their corresponding default file extension
123 __file_extensions_dict = {
da26094a
ERE
124 '': '',
125 ':': '',
126 ':gz': '.gz',
127 ':bz2': '.bz2',
128 '|': '',
129 '|gz': '.gz',
130 '|bz2': '.bz2',
131 '#gz': '.gz',
6e99d23a
PG
132 '#gz.pdtcrypt': '.gz',
133 '#pdtcrypt': '',
d1c38f40 134 '#': '',
0708a374
ERE
135 }
136
3a7e1a50
ERE
137 # valid index modes and their corresponding default file extension
138 __index_extensions_dict = {
139 '': '',
140 'gz': '.gz',
141 'bz2': '.bz2',
6e99d23a
PG
142 'gz.pdtcrypt': '.gz',
143 'pdtcrypt': '',
3a7e1a50
ERE
144 }
145
8adbe50d
ERE
146 # valid path prefixes
147 __path_prefix_list = [
148 u'snapshot://',
149 u'list://',
150 u'delete://'
151 ]
152
0708a374 153 def __init__(self, excluded_files=[], included_files=[],
da26094a 154 filter_func=None, mode="", password=None,
1f3fd7b0 155 crypto_key=None, nacl=None,
54f909ca 156 crypto_version=DELTATAR_HEADER_VERSION,
dbee011c 157 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
3a7e1a50 158 logger=None, index_mode=None, index_name_func=None,
0708a374
ERE
159 volume_name_func=None):
160 '''
161 Constructor. Configures the diff engine.
162
163 Parameters:
164 - excluded_files: list of files to exclude in the backup creation or
165 restore operation. It can contain python regular expressions.
166
167 - included_files: list of files to include in the backup creation or
168 restore operation. It can contain python regular expressions. If
169 empty, all files in the source path will be backed up (when creating a
170 backup) or all the files in the backup will be restored (when
a83fa4ed 171 restoring a backup), but if included_files is set then only the files
0708a374
ERE
172 include in the list will be processed.
173
174 - filter_func: custom filter of files to be backed up (or restored).
175 Unused and unset by default. The function receives a file path and
176 must return a boolean.
177
178 - mode: mode in which the delta will be created (when creating a backup)
179 or opened (when restoring). Accepts the same modes as the tarfile
180 library. Valid modes are:
181
da26094a
ERE
182 '' open uncompressed
183 ':' open uncompressed
184 ':gz' open with gzip compression
185 ':bz2' open with bzip2 compression
186 '|' open an uncompressed stream of tar blocks
187 '|gz' open a gzip compressed stream of tar blocks
188 '|bz2' open a bzip2 compressed stream of tar blocks
189 '#gz' open a stream of gzip compressed tar blocks
0708a374 190
1f3fd7b0
PG
191 - crypto_key: used to encrypt and decrypt backups. Encryption will
192 be enabled automatically if a key is supplied. Requires a salt to be
193 passed as well.
194
195 - nacl: salt that was used to derive the encryption key for embedding
196 in the PDTCRYPT header. Not needed when decrypting and when
197 encrypting with password.
198
6e99d23a
PG
199 - password: used to encrypt and decrypt backups. Encryption will be
200 enabled automatically if a password is supplied.
0708a374 201
54f909ca
PG
202 - crypto_version: version of the format, determining the kind of PDT
203 object header.
204
dbee011c
PG
205 - crypto_paramversion: optionally request encryption conforming to
206 a specific parameter version. Defaults to the standard PDT value
207 which as of 2017 is the only one available.
208
0708a374
ERE
209 - logger: python logger object. Optional.
210
3a7e1a50 211 - index_mode: specifies the index mode in the same format as @param
6e99d23a
PG
212 mode, but without the ':', '|' or '#' at the begining. If encryption
213 is requested it will extend to the auxiliary (index, info) files as
214 well. This is an optional parameter that will automatically mimic
215 @param mode by default if not provided. Valid modes are:
3a7e1a50
ERE
216
217 '' open uncompressed
218 'gz' open with gzip compression
219 'bz2' open with bzip2 compression
0708a374
ERE
220
221 - index_name_func: function that sets a custom name for the index file.
2cc6e32b
PG
222 This function receives a flag to indicate whether the name will be
223 used for a full or diff backup. The backup path will be prepended to
224 its return value.
0708a374
ERE
225
226 - volume_name_func: function that defines the name of tar volumes. It
227 receives the backup_path, if it's a full backup and the volume number,
228 and must return the name for the corresponding volume name. Optional,
229 DeltaTar has default names for tar volumes.
230 '''
231
da26094a 232 if mode not in self.__file_extensions_dict:
8a54d5dd
PG
233 raise Exception('Unrecognized extension mode=[%s] requested for files'
234 % str(mode))
0708a374
ERE
235
236 self.excluded_files = excluded_files
237 self.included_files = included_files
238 self.filter_func = filter_func
239 self.logger = logging.getLogger('deltatar.DeltaTar')
240 if logger:
241 self.logger.addHandler(logger)
242 self.mode = mode
2ae46844 243
1f3fd7b0
PG
244 if crypto_key is not None:
245 self.crypto_key = crypto_key
246 self.nacl = nacl # encryption only
247
2ae46844
PG
248 if password is not None:
249 self.password = password
3a7e1a50 250
54f909ca
PG
251 if crypto_version is not None:
252 self.crypto_version = crypto_version
253
dbee011c
PG
254 if crypto_paramversion is not None:
255 self.crypto_paramversion = crypto_paramversion
256
3a7e1a50
ERE
257 # generate index_mode
258 if index_mode is None:
259 index_mode = ''
6e99d23a 260 if 'gz' in mode:
3a7e1a50
ERE
261 index_mode = "gz"
262 elif 'bz2' in mode:
263 index_mode = "bz2"
264 elif mode not in self.__index_extensions_dict:
8a54d5dd
PG
265 raise Exception('Unrecognized extension mode=[%s] requested for index'
266 % str(mode))
3a7e1a50
ERE
267
268 self.index_mode = index_mode
0708a374
ERE
269 self.current_time = datetime.datetime.now()
270
271 if index_name_func is not None:
272 self.index_name_func = index_name_func
273
274 if volume_name_func is not None:
275 self.volume_name_func = volume_name_func
276
e54cfec5 277 def pick_extension(self, kind, mode=None):
2cdd9faf
PG
278 """
279 Choose the extension depending on a) the kind of file given, b) the
280 processing mode, and c) the current encryption settings.
281 """
282 ret = ""
283 if kind == PDT_TYPE_ARCHIVE:
284 ret += ".tar"
e54cfec5
PG
285 if mode is None:
286 mode = self.__index_extensions_dict [self.index_mode]
2cdd9faf 287 ret += mode
a83fa4ed 288 if self.crypto_key is not None or self.password is not None:
2cdd9faf
PG
289 ret += "." + PDTCRYPT_EXTENSION
290 return ret
291
f0287fb7 292 def index_name_func(self, is_full): # pylint: disable=method-hidden
0708a374 293 '''
2cc6e32b
PG
294 Callback for setting a custom name for the index file. Depending on
295 whether *is_full* is set, it will create a suitable name for a full
296 or a diff backup.
0708a374
ERE
297 '''
298 prefix = "bfull" if is_full else "bdiff"
f7940c31 299 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf
PG
300 extension = self.pick_extension \
301 (PDT_TYPE_AUX,
302 self.__index_extensions_dict [self.index_mode])
0708a374 303
da26094a 304 return "%s-%s.index%s" % (prefix, date_str, extension)
0708a374 305
f0287fb7
CH
306 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
307 is_full, volume_number,
308 guess_name=False):
0708a374
ERE
309 '''
310 function that defines the name of tar volumes. It receives the
311 backup_path, if it's a full backup and the volume number, and must return
312 the name for the corresponding volume name. Optional, DeltaTar has default
313 names for tar volumes.
df86af81
ERE
314
315 If guess_name is activated, the file is intended not to be created but
316 to be found, and thus the date will be guessed.
0708a374
ERE
317 '''
318 prefix = "bfull" if is_full else "bdiff"
2cdd9faf
PG
319 extension = self.pick_extension \
320 (PDT_TYPE_ARCHIVE,
321 self.__file_extensions_dict [self.mode])
0708a374 322
df86af81 323 if not guess_name:
f7940c31 324 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf 325 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
df86af81
ERE
326 else:
327 prefix = prefix + "-"
90b75470 328 postfix = "-%03d%s" % (volume_number + 1, extension)
86a6e741
ERE
329 for f in os.listdir(backup_path):
330 if f.startswith(prefix) and f.endswith(postfix):
331 return f
df86af81
ERE
332 raise Exception("volume not found")
333
0708a374 334
974408b5 335 def filter_path(self, path, source_path="", is_dir=None):
8a8fadda
ERE
336 '''
337 Filters a path, given the source_path, using the filtering properties
338 set in the constructor.
339 The filtering order is:
340 1. included_files (if any)
341 2. excluded_files
342 3. filter_func (which must return whether the file is accepted or not)
343 '''
75059f3c 344
c1af2184 345 if len(source_path) > 0:
75059f3c
CH
346 # ensure that exactly one '/' at end of dir is also removed
347 source_path = source_path.rstrip(os.sep) + os.sep
8a8fadda
ERE
348 path = path[len(source_path):]
349
350 # 1. filter included_files
974408b5 351 match = MATCH
8a8fadda 352 if len(self.included_files) > 0:
974408b5 353 match = NO_MATCH
8a8fadda
ERE
354 for i in self.included_files:
355 # it can be either a regexp or a string
be60ffd0 356 if isinstance(i, str):
8a8fadda
ERE
357 # if the string matches, then continue
358 if i == path:
974408b5 359 match = MATCH
c1af2184 360 break
8a8fadda
ERE
361
362 # if the string ends with / it's a directory, and if the
7b07645e 363 # path is contained in it, it is included
c1af2184 364 if i.endswith('/') and path.startswith(i):
974408b5 365 match = MATCH
c1af2184 366 break
8a8fadda
ERE
367
368 # if the string doesn't end with /, add it and do the same
369 # check
c1af2184 370 elif path.startswith(i + '/'):
974408b5 371 match = MATCH
c1af2184 372 break
8a8fadda 373
974408b5
ERE
374 # check for PARENT_MATCH
375 if is_dir:
376 dir_path = path
377 if not dir_path.endswith('/'):
378 dir_path += '/'
379
380 if i.startswith(dir_path):
381 match = PARENT_MATCH
382
8a8fadda
ERE
383 # if it's a reg exp, then we just check if it matches
384 elif isinstance(i, re._pattern_type):
c1af2184 385 if i.match(path):
974408b5 386 match = MATCH
c1af2184 387 break
8a8fadda 388 else:
4bda6f45 389 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
8a8fadda 390
974408b5
ERE
391 if match == NO_MATCH:
392 return NO_MATCH
c1af2184 393
974408b5
ERE
394 # when a directory is in PARENT_MATCH, it doesn't matter if it's
395 # excluded. It's subfiles will be excluded, but the directory itself
396 # won't
397 if match != PARENT_MATCH:
8a8fadda
ERE
398 for e in self.excluded_files:
399 # it can be either a regexp or a string
be60ffd0 400 if isinstance(e, str):
8a8fadda 401 # if the string matches, then exclude
c1af2184 402 if e == path:
974408b5 403 return NO_MATCH
8a8fadda
ERE
404
405 # if the string ends with / it's a directory, and if the
406 # path starts with the directory, then exclude
c1af2184 407 if e.endswith('/') and path.startswith(e):
974408b5 408 return NO_MATCH
8a8fadda
ERE
409
410 # if the string doesn't end with /, do the same check with
411 # the slash added
c1af2184 412 elif path.startswith(e + '/'):
974408b5 413 return NO_MATCH
8a8fadda
ERE
414
415 # if it's a reg exp, then we just check if it matches
c1af2184
ERE
416 elif isinstance(e, re._pattern_type):
417 if e.match(path):
974408b5 418 return NO_MATCH
8a8fadda 419 else:
4bda6f45 420 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
8a8fadda
ERE
421
422 if self.filter_func:
423 return self.filter_func(path)
424
974408b5 425 return match
8a8fadda 426
283fbd5e 427 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
0708a374
ERE
428 '''
429 Walk a directory recursively, yielding each file/directory
c059a221
PG
430
431 Returns the path of an entity. If ``keep_base_dir`` is set,
432 the path returned contains the prefix ``source_path``; otherwise it is
433 relative to the prefix.
0708a374
ERE
434 '''
435
283fbd5e 436 source_path = source_path.rstrip(os.sep)
0708a374 437
283fbd5e 438 if keep_base_dir:
adf7dac4 439 beginning_size = 0
283fbd5e
CH
440 else:
441 beginning_size = len(source_path) + 1 # +1 for os.sep
442
443 queue = [source_path]
444
d07c8065 445 while queue:
df86af81 446 cur_path = queue.pop(0)
0708a374 447
c059a221
PG
448 dfd = os.open (cur_path, os.O_DIRECTORY)
449 if dfd == -1: # it might have been removed in the meantime
d86735e4
ERE
450 continue
451
c059a221
PG
452 try:
453 for filename in sorted(os.listdir(dfd)):
454 child = os.path.join(cur_path, filename)
455 is_dir = os.path.isdir(child)
456 status = self.filter_path(child, source_path, is_dir)
457 if status == NO_MATCH:
458 continue
459 if not os.access(child, os.R_OK):
460 self.logger.warning('Error accessing possibly locked file %s' % child)
461 continue
462
463 if status == MATCH:
464 yield child[beginning_size:]
465
466 if is_dir and (status == MATCH or status == PARENT_MATCH):
467 queue.append(child)
468 finally:
469 os.close (dfd)
0708a374 470
e82f14f5
ERE
471 def _stat_dict(self, path):
472 '''
473 Returns a dict with the stat data used to compare files
474 '''
475 stinfo = os.stat(path)
476 mode = stinfo.st_mode
477
478 ptype = None
479 if stat.S_ISDIR(mode):
d07c8065 480 ptype = u'directory'
e82f14f5 481 elif stat.S_ISREG(mode):
d07c8065 482 ptype = u'file'
e82f14f5 483 elif stat.S_ISLNK(mode):
d07c8065 484 ptype = u'link'
e82f14f5
ERE
485
486 return {
d07c8065 487 u'type': ptype,
be60ffd0 488 u'path': path,
d07c8065 489 u'mode': mode,
0501fe0a
ERE
490 u'mtime': int(stinfo.st_mtime),
491 u'ctime': int(stinfo.st_ctime),
d07c8065
ERE
492 u'uid': stinfo.st_uid,
493 u'gid': stinfo.st_gid,
494 u'inode': stinfo.st_ino,
495 u'size': stinfo.st_size
e82f14f5
ERE
496 }
497
df99a044 498 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
d07c8065
ERE
499 '''
500 Return if the dicts are equal in the stat keys
501 '''
fc8fdcbc 502 keys = [u'type', u'mode',u'size', u'mtime',
d041935c 503 # not restored: u'inode', u'ctime'
df99a044 504 ]
8adbe50d 505
fc8fdcbc 506 # only if user is root, then also check gid/uid. otherwise do not check it,
d041935c 507 # because tarfile can chown in case of being superuser only
50d70ca9
PG
508 #
509 # also, skip the check in rpmbuild since the sources end up with the
510 # uid:gid of the packager while the extracted files are 0:0.
511 if hasattr(os, "geteuid") and os.geteuid() == 0 \
512 and os.getenv ("RPMBUILD_OPTIONS") is None:
fc8fdcbc
ERE
513 keys.append('gid')
514 keys.append('uid')
515
ea6d3c3e 516 if (not d1 and d2 != None) or (d1 != None and not d2):
8adbe50d
ERE
517 return False
518
cbac9f0b
ERE
519 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
520 return False
8adbe50d 521
fc8fdcbc
ERE
522 type = d1.get('type', '')
523
d07c8065 524 for key in keys:
fc8fdcbc
ERE
525 # size doesn't matter for directories
526 if type == 'directory' and key == 'size':
527 continue
d07c8065
ERE
528 if d1.get(key, -1) != d2.get(key, -2):
529 return False
530 return True
531
df99a044 532 def prefixed(self, path, listsnapshot_equal=False):
8adbe50d
ERE
533 '''
534 if a path is not prefixed, return it prefixed
535 '''
536 for prefix in self.__path_prefix_list:
537 if path.startswith(prefix):
df99a044
ERE
538 if listsnapshot_equal and prefix == u'list://':
539 return u'snapshot://' + path[len(prefix):]
8adbe50d
ERE
540 return path
541 return u'snapshot://' + path
542
543 def unprefixed(self, path):
544 '''
545 remove a path prefix if any
546 '''
547 for prefix in self.__path_prefix_list:
548 if path.startswith(prefix):
549 return path[len(prefix):]
550 return path
551
133d30da
PG
552
553 def initialize_encryption (self, mode):
554 password = self.password
1f3fd7b0
PG
555 key = self.crypto_key
556 nacl = self.nacl
133d30da 557
1f3fd7b0 558 if key is None and password is None:
133d30da
PG
559 return
560 if mode == CRYPTO_MODE_ENCRYPT:
1f3fd7b0
PG
561 return crypto.Encrypt (password=password,
562 key=key,
563 nacl=nacl,
54f909ca 564 version=self.crypto_version,
774ca538 565 paramversion=self.crypto_paramversion)
133d30da 566 if mode == CRYPTO_MODE_DECRYPT:
1f3fd7b0 567 return crypto.Decrypt (password=password, key=key)
133d30da
PG
568
569 raise Exception ("invalid encryption mode [%r]" % mode)
570
571
9eccb1c2 572 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
3a7e1a50 573 '''
9eccb1c2
PG
574 Given the specified configuration, opens a file for reading or writing,
575 inheriting the encryption and compression settings from the backup.
576 Returns a file object ready to use.
3fdea6d4 577
c8c72fe1
PG
578 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
579 respectively).
580 :type mode: str
774ca538
PG
581 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
582 Both the info and the auxiliary file have a globally
583 unique, constant counter value.
3fdea6d4 584 :type kind: str
3a7e1a50 585 '''
3a7e1a50
ERE
586 if self.index_mode.startswith('gz'):
587 comptype = 'gz'
588 elif self.index_mode.startswith('bz2'):
589 comptype = 'bz2'
590 else:
591 comptype = 'tar'
592
133d30da 593 crypto_ctx = None
6de9444a 594 enccounter = None
133d30da 595 if mode == "w":
774ca538 596 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 597 elif mode == "r":
774ca538 598 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
133d30da 599
3031b7ae
PG
600 if crypto_ctx is not None:
601 if kind == AUXILIARY_FILE_INFO:
602 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
603 elif kind == AUXILIARY_FILE_INDEX:
604 enccounter = crypto.AES_GCM_IV_CNT_INDEX
605 else:
606 raise Exception ("invalid kind of aux file %r" % kind)
607
c8c72fe1 608 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
3fdea6d4 609 bufsize=tarfile.RECORDSIZE, fileobj=None,
6de9444a 610 encryption=crypto_ctx, enccounter=enccounter)
c8c72fe1
PG
611
612 return sink
613
3a7e1a50 614
0708a374 615 def create_full_backup(self, source_path, backup_path,
d4a05db6 616 max_volume_size=None, extra_data=dict()):
0708a374
ERE
617 '''
618 Creates a full backup.
619
620 Parameters:
621 - source_path: source path to the directory to back up.
622 - backup_path: path where the back up will be stored. Backup path will
623 be created if not existent.
d5361dac
ERE
624 - max_volume_size: maximum volume size in megabytes. Used to split the
625 backup in volumes. Optional (won't split in volumes by default).
9eae9a1f
ERE
626 - extra_data: a json-serializable dictionary with information that you
627 want to be included in the header of the index file
0708a374
ERE
628 '''
629 # check input
be60ffd0 630 if not isinstance(source_path, str):
0708a374
ERE
631 raise Exception('Source path must be a string')
632
be60ffd0 633 if not isinstance(backup_path, str):
0708a374
ERE
634 raise Exception('Backup path must be a string')
635
636 if not os.path.exists(source_path) or not os.path.isdir(source_path):
637 raise Exception('Source path "%s" does not exist or is not a '\
638 'directory' % source_path)
639
d07c8065
ERE
640 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
641 max_volume_size < 1):
642 raise Exception('max_volume_size must be a positive integer')
d5361dac
ERE
643 if max_volume_size != None:
644 max_volume_size = max_volume_size*1024*1024
645
9eae9a1f
ERE
646 if not isinstance(extra_data, dict):
647 raise Exception('extra_data must be a dictionary')
648
649 try:
650 extra_data_str = json.dumps(extra_data)
651 except:
652 raise Exception('extra_data is not json-serializable')
653
0708a374
ERE
654 if not os.access(source_path, os.R_OK):
655 raise Exception('Source path "%s" is not readable' % source_path)
656
657 # try to create backup path if needed
37ab0f57 658 os.makedirs(backup_path, exist_ok=True)
0708a374
ERE
659
660 if not os.access(backup_path, os.W_OK):
661 raise Exception('Backup path "%s" is not writeable' % backup_path)
662
663 if source_path.endswith('/'):
664 source_path = source_path[:-1]
665
666 if backup_path.endswith('/'):
667 backup_path = backup_path[:-1]
668
669 # update current time
670 self.current_time = datetime.datetime.now()
671
672 if self.mode not in self.__file_extensions_dict:
673 raise Exception('Unrecognized extension')
674
2ae46844 675 # setup for encrypting payload
774ca538
PG
676 if self.encryptor is None:
677 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
2ae46844 678
0708a374 679 # some initialization
11684b1d 680 self.vol_no = 0
0708a374
ERE
681
682 # generate the first volume name
683 vol_name = self.volume_name_func(backup_path, True, 0)
684 tarfile_path = os.path.join(backup_path, vol_name)
685
774ca538
PG
686 # init index
687 index_name = self.index_name_func(True)
688 index_path = os.path.join(backup_path, index_name)
689 index_sink = self.open_auxiliary_file(index_path, 'w')
e82f14f5 690
d5361dac
ERE
691 cwd = os.getcwd()
692
b7c47f38 693 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
0708a374
ERE
694 '''
695 Handles the new volumes
696 '''
d5361dac
ERE
697 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
698 volume_path = os.path.join(backup_path, volume_name)
11684b1d 699 deltarobj.vol_no = volume_number
d5361dac
ERE
700
701 # we convert relative paths into absolute because CWD is changed
702 if not os.path.isabs(volume_path):
703 volume_path = os.path.join(cwd, volume_path)
11684b1d 704
8e019196
ERE
705 if tarobj.fileobj is not None:
706 tarobj.fileobj.close()
707
b008f989
ERE
708 deltarobj.logger.debug("opening volume %s" % volume_path)
709
b7c47f38 710 tarobj.open_volume(volume_path, encryption=encryption)
d5361dac
ERE
711
712 # wraps some args from context into the handler
133d30da 713 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
0708a374 714
774ca538 715 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
6c678f3a 716
be60ffd0 717 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
6c678f3a 718 # calculate checksum and write into the stream
c2ffe2ec 719 crc = binascii.crc32(s) & 0xFFFFffff
774ca538 720 index_sink.write(s)
e82f14f5 721
0708a374
ERE
722 # start creating the tarfile
723 tarobj = tarfile.TarFile.open(tarfile_path,
da26094a 724 mode='w' + self.mode,
0708a374 725 format=tarfile.GNU_FORMAT,
d1c38f40 726 concat='#' in self.mode,
133d30da 727 encryption=self.encryptor,
0708a374 728 max_volume_size=max_volume_size,
ea625b04 729 new_volume_handler=new_volume_handler,
e2b59b34
ERE
730 save_to_members=False,
731 dereference=True)
e5c6ca04 732 os.chdir(source_path)
55b8686d
ERE
733
734 # for each file to be in the backup, do:
e82f14f5 735 for path in self._recursive_walk_dir('.'):
55b8686d 736 # calculate stat dict for current file
253d4cdd
ERE
737 statd = self._stat_dict(path)
738 statd['path'] = u'snapshot://' + statd['path']
739 statd['volume'] = self.vol_no
55b8686d
ERE
740
741 # backup file
3e9b81bb
PG
742
743 try: # backup file
744 tarobj.add(path, arcname = statd['path'], recursive=False)
745 except FileNotFoundError as exn:
746 # file vanished since the call to access(3) above
747 self.logger.warning ("object [%s] no longer available in "
748 "file system (error: %s); skipping"
749 % (path, str (exn)))
750 continue # prevent indexing
11684b1d 751
55b8686d 752 # retrieve file offset
253d4cdd 753 statd['offset'] = tarobj.get_last_member_offset()
b008f989 754 self.logger.debug("backup %s" % statd['path'])
6c678f3a 755
d041935c 756 # store the stat dict in the index
be60ffd0 757 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
6c678f3a 758 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 759 index_sink.write(s)
e82f14f5 760
be60ffd0 761 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
6c678f3a 762 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 763 index_sink.write(s)
be60ffd0 764 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
774ca538
PG
765 index_sink.write(s)
766
e5c6ca04 767 os.chdir(cwd)
0708a374 768 tarobj.close()
c8c72fe1 769 index_sink.close (close_fileobj=True)
938c2d54 770
0708a374 771 def create_diff_backup(self, source_path, backup_path, previous_index_path,
d4a05db6 772 max_volume_size=None, extra_data=dict()):
0708a374
ERE
773 '''
774 Creates a backup.
775
776 Parameters:
777 - source_path: source path to the directory to back up.
778 - backup_path: path where the back up will be stored. Backup path will
779 be created if not existent.
780 - previous_index_path: index of the previous backup, needed to know
781 which files changed since then.
782 - max_volume_size: maximum volume size in megabytes (MB). Used to split
783 the backup in volumes. Optional (won't split in volumes by default).
3a7e1a50
ERE
784
785 NOTE: previous index is assumed to follow exactly the same format as
786 the index_mode setup in the constructor.
0708a374 787 '''
d07c8065 788 # check/sanitize input
be60ffd0 789 if not isinstance(source_path, str):
d07c8065
ERE
790 raise Exception('Source path must be a string')
791
be60ffd0 792 if not isinstance(backup_path, str):
d07c8065
ERE
793 raise Exception('Backup path must be a string')
794
795 if not os.path.exists(source_path) or not os.path.isdir(source_path):
796 raise Exception('Source path "%s" does not exist or is not a '\
797 'directory' % source_path)
798
9eae9a1f
ERE
799 if not isinstance(extra_data, dict):
800 raise Exception('extra_data must be a dictionary')
801
802 try:
803 extra_data_str = json.dumps(extra_data)
804 except:
805 raise Exception('extra_data is not json-serializable')
806
d07c8065
ERE
807 if not os.access(source_path, os.R_OK):
808 raise Exception('Source path "%s" is not readable' % source_path)
809
810 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
811 max_volume_size < 1):
812 raise Exception('max_volume_size must be a positive integer')
813 if max_volume_size != None:
814 max_volume_size = max_volume_size*1024*1024
815
be60ffd0 816 if not isinstance(previous_index_path, str):
d07c8065
ERE
817 raise Exception('previous_index_path must be A string')
818
819 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
820 raise Exception('Index path "%s" does not exist or is not a '\
821 'file' % previous_index_path)
822
823 if not os.access(previous_index_path, os.R_OK):
824 raise Exception('Index path "%s" is not readable' % previous_index_path)
825
826 # try to create backup path if needed
37ab0f57 827 os.makedirs(backup_path, exist_ok=True)
d07c8065
ERE
828
829 if not os.access(backup_path, os.W_OK):
830 raise Exception('Backup path "%s" is not writeable' % backup_path)
831
832 if source_path.endswith('/'):
833 source_path = source_path[:-1]
834
835 if backup_path.endswith('/'):
836 backup_path = backup_path[:-1]
837
838 # update current time
839 self.current_time = datetime.datetime.now()
840
841 if self.mode not in self.__file_extensions_dict:
842 raise Exception('Unrecognized extension')
843
2ae46844 844 # setup for encrypting payload
774ca538
PG
845 if self.encryptor is None:
846 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 847
d07c8065
ERE
848 # some initialization
849 self.vol_no = 0
850
851 # generate the first volume name
df86af81
ERE
852 vol_name = self.volume_name_func(backup_path, is_full=False,
853 volume_number=0)
d07c8065
ERE
854 tarfile_path = os.path.join(backup_path, vol_name)
855
938c2d54 856 # init index
d07c8065
ERE
857 cwd = os.getcwd()
858
3031b7ae
PG
859 index_name = self.index_name_func(is_full=False)
860 index_path = os.path.join(backup_path, index_name)
861 index_sink = self.open_auxiliary_file(index_path, 'w')
862
d07c8065
ERE
863 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
864 '''
865 Handles the new volumes
866 '''
df86af81
ERE
867 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
868 volume_number=volume_number)
d07c8065
ERE
869 volume_path = os.path.join(backup_path, volume_name)
870 deltarobj.vol_no = volume_number
871
872 # we convert relative paths into absolute because CWD is changed
873 if not os.path.isabs(volume_path):
874 volume_path = os.path.join(cwd, volume_path)
875
f624ff3d 876 deltarobj.logger.debug("opening volume %s" % volume_path)
d07c8065
ERE
877 tarobj.open_volume(volume_path)
878
879 # wraps some args from context into the handler
880 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
881
3031b7ae 882 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
d07c8065 883
be60ffd0 884 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
d07c8065 885 # calculate checksum and write into the stream
c2ffe2ec 886 crc = binascii.crc32(s) & 0xFFFFffff
3031b7ae 887 index_sink.write(s)
d07c8065
ERE
888
889 # start creating the tarfile
890 tarobj = tarfile.TarFile.open(tarfile_path,
891 mode='w' + self.mode,
892 format=tarfile.GNU_FORMAT,
d1c38f40 893 concat='#' in self.mode,
133d30da 894 encryption=self.encryptor,
d07c8065 895 max_volume_size=max_volume_size,
ea625b04 896 new_volume_handler=new_volume_handler,
e2b59b34
ERE
897 save_to_members=False,
898 dereference=True)
d07c8065 899
aae127d0
ERE
900
901 # create the iterators, first the previous index iterator, then the
902 # source path directory iterator and collate and iterate them
903 if not os.path.isabs(previous_index_path):
904 previous_index_path = os.path.join(cwd, previous_index_path)
905 index_it = self.iterate_index_path(previous_index_path)
906
d07c8065 907 os.chdir(source_path)
aae127d0
ERE
908 dir_it = self._recursive_walk_dir('.')
909 dir_path_it = self.jsonize_path_iterator(dir_it)
d07c8065 910
df86af81
ERE
911 def pr(path):
912 if not path:
913 return "None"
914 else:
915 return path["path"]
8edb2e3c 916
d07c8065 917 # for each file to be in the backup, do:
df86af81 918 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
aae127d0
ERE
919 action = None
920 # if file is not in the index, it means it's a new file, so we have
921 # to take a snapshot
df86af81 922
aae127d0
ERE
923 if not ipath:
924 action = 'snapshot'
925 # if the file is not in the directory iterator, it means that it has
d041935c 926 # been deleted, so we need to mark it as such
aae127d0
ERE
927 elif not dpath:
928 action = 'delete'
929 # if the file is in both iterators, it means it might have either
930 # not changed (in which case we will just list it in our index but
931 # it will not be included in the tar file), or it might have
e8d95fe5 932 # changed, in which case we will snapshot it.
aae127d0
ERE
933 elif ipath and dpath:
934 if self._equal_stat_dicts(ipath, dpath):
935 action = 'list'
936 else:
937 action = 'snapshot'
938 # TODO: when creating chained backups (i.e. diffing from another
939 # diff), we will need to detect the type of action in the previous
940 # index, because if it was delete and dpath is None, we should
941 # discard the file
942
943 if action == 'snapshot':
944 # calculate stat dict for current file
945 stat = dpath.copy()
be60ffd0 946 stat['path'] = "snapshot://" + dpath['path']
aae127d0
ERE
947 stat['volume'] = self.vol_no
948
50f43227
ERE
949 self.logger.debug("[STORE] %s" % dpath['path'])
950
3e9b81bb
PG
951 try: # backup file
952 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
953 # retrieve file offset
954 stat['offset'] = tarobj.get_last_member_offset()
955 except FileNotFoundError as exn:
956 # file vanished since the call to access(3) above
957 self.logger.warning ("object [%s] no longer available in "
958 "file system (error: %s); skipping"
959 % (dpath ["path"], str (exn)))
960 stat = None # prevent indexing
aae127d0 961
aae127d0 962 elif action == 'delete':
50f43227 963 path = self.unprefixed(ipath['path'])
aae127d0 964 stat = {
50f43227 965 u'path': u'delete://' + path,
aae127d0
ERE
966 u'type': ipath['type']
967 }
50f43227 968 self.logger.debug("[DELETE] %s" % path)
aae127d0
ERE
969
970 # mark it as deleted in the backup
42d39ca7 971 tarobj.add("/dev/null", arcname=stat['path'])
aae127d0
ERE
972 elif action == 'list':
973 stat = dpath.copy()
50f43227
ERE
974 path = self.unprefixed(ipath['path'])
975 stat['path'] = u'list://' + path
aae127d0 976 # unchanged files do not enter in the backup, only in the index
50f43227 977 self.logger.debug("[UNCHANGED] %s" % path)
80910564
TJ
978 else:
979 # should not happen
4bda6f45 980 self.logger.warning('unknown action in create_diff_backup: {0}'
80910564
TJ
981 ''.format(action))
982 stat = None
aae127d0 983
80910564
TJ
984 if stat:
985 # store the stat dict in the index
be60ffd0 986 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
aae127d0 987 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 988 index_sink.write(s)
aae127d0 989
be60ffd0 990 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
aae127d0 991 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 992 index_sink.write(s)
be60ffd0 993 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
3031b7ae 994 index_sink.write(s)
938c2d54 995
df86af81 996 index_it.release()
aae127d0
ERE
997 os.chdir(cwd)
998 tarobj.close()
938c2d54
PG
999 index_sink.close()
1000
1001
d07c8065 1002 def iterate_index_path(self, index_path):
df86af81
ERE
1003 '''
1004 Returns an index iterator. Internally, it uses a classic iterator class.
1005 We do that instead of just yielding so that the iterator object can have
1006 an additional function to close the file descriptor that is opened in
1007 the constructor.
1008 '''
d07c8065 1009
df86af81
ERE
1010 class IndexPathIterator(object):
1011 def __init__(self, delta_tar, index_path):
1012 self.delta_tar = delta_tar
1013 self.index_path = index_path
1014 self.f = None
9eae9a1f 1015 self.extra_data = dict()
df86af81 1016 self.__enter__()
d07c8065 1017
df86af81
ERE
1018 def __iter__(self):
1019 return self
d07c8065 1020
df86af81
ERE
1021 def release(self):
1022 if self.f:
1023 self.f.close()
1024
1025 def __enter__(self):
1026 '''
1027 Allows this iterator to be used with the "with" statement
1028 '''
1029 if self.f is None:
9eccb1c2 1030 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
df86af81
ERE
1031 # check index header
1032 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1033 if j.get("type", '') != 'python-delta-tar-index' or\
1034 j.get('version', -1) != 1:
1035 raise Exception("invalid index file format: %s" % json.dumps(j))
1036
9eae9a1f
ERE
1037 self.extra_data = j.get('extra_data', dict())
1038
df86af81
ERE
1039 # find BEGIN-FILE-LIST, ignore other headers
1040 while True:
1041 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1042 if j.get('type', '') == 'BEGIN-FILE-LIST':
1043 break
1044 return self
1045
1046 def __exit__(self, type, value, tb):
1047 '''
1048 Allows this iterator to be used with the "with" statement
1049 '''
ec57ce53
ERE
1050 if self.f:
1051 self.f.close()
df86af81 1052 self.f = None
d07c8065 1053
be60ffd0 1054 def __next__(self):
0349168a 1055 # read each file in the index and process it to do the restore
df86af81
ERE
1056 j = {}
1057 l_no = -1
1058 try:
1059 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
be60ffd0 1060 except Exception as e:
df86af81
ERE
1061 if self.f:
1062 self.f.close()
1063 raise e
d07c8065 1064
df86af81 1065 op_type = j.get('type', '')
d07c8065 1066
df86af81
ERE
1067 # when we detect the end of the list, break the loop
1068 if op_type == 'END-FILE-LIST':
1069 if self.f:
1070 self.f.close()
1071 raise StopIteration
1072
1073 # check input
1074 if op_type not in ['directory', 'file', 'link']:
4bda6f45 1075 self.delta_tar.logger.warning('unrecognized type to be '
df86af81
ERE
1076 'restored: %s, line %d' % (op_type, l_no))
1077 # iterate again
be60ffd0 1078 return self.__next__()
df86af81
ERE
1079
1080 return j, l_no
d07c8065 1081
df86af81 1082 return IndexPathIterator(self, index_path)
d07c8065 1083
26fdd428 1084 def iterate_tar_path(self, tar_path, new_volume_handler=None):
24ddf0a2
ERE
1085 '''
1086 Returns a tar iterator that iterates jsonized member items that contain
1087 an additional "member" field, used by RestoreHelper.
1088 '''
ec57ce53 1089 class TarPathIterator(object):
83a81852 1090 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
24ddf0a2 1091 self.delta_tar = delta_tar
ec57ce53 1092 self.tar_path = tar_path
24ddf0a2 1093 self.tar_obj = None
6bca471c 1094 self.last_member = None
26fdd428 1095 self.new_volume_handler = new_volume_handler
24ddf0a2
ERE
1096 self.__enter__()
1097
1098 def __iter__(self):
1099 return self
1100
1101 def release(self):
1102 if self.tar_obj:
1103 self.tar_obj.close()
1104
1105 def __enter__(self):
1106 '''
1107 Allows this iterator to be used with the "with" statement
1108 '''
1109 if self.tar_obj is None:
d5e1d60f
PG
1110 decryptor = None
1111 if self.delta_tar.password is not None:
1f3fd7b0
PG
1112 decryptor = crypto.Decrypt \
1113 (password=self.delta_tar.password,
1114 key=self.delta_tar.crypto_key)
ec57ce53
ERE
1115 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1116 mode='r' + self.delta_tar.mode,
1117 format=tarfile.GNU_FORMAT,
d1c38f40 1118 concat='#' in self.delta_tar.mode,
d5e1d60f 1119 encryption=decryptor,
83a81852 1120 new_volume_handler=self.new_volume_handler,
e2b59b34
ERE
1121 save_to_members=False,
1122 dereference=True)
24ddf0a2
ERE
1123 return self
1124
1125 def __exit__(self, type, value, tb):
1126 '''
1127 Allows this iterator to be used with the "with" statement
1128 '''
ec57ce53
ERE
1129 if self.tar_obj:
1130 self.tar_obj.close()
24ddf0a2
ERE
1131 self.tar_obj = None
1132
be60ffd0 1133 def __next__(self):
24ddf0a2
ERE
1134 '''
1135 Read each member and return it as a stat dict
1136 '''
be60ffd0 1137 tarinfo = self.tar_obj.__iter__().__next__()
8e019196
ERE
1138 # NOTE: here we compare if tarinfo.path is the same as before
1139 # instead of comparing the tarinfo object itself because the
1140 # object itself might change for multivol tarinfos
1141 if tarinfo is None or (self.last_member is not None and\
1142 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
ec57ce53
ERE
1143 raise StopIteration
1144
6bca471c
ERE
1145 self.last_member = tarinfo
1146
24ddf0a2
ERE
1147 ptype = 'unknown'
1148 if tarinfo.isfile():
1149 ptype = 'file'
1150 elif tarinfo.isdir():
ab7e7465 1151 ptype = 'directory'
24ddf0a2
ERE
1152 elif tarinfo.islnk() or tarinfo.issym():
1153 ptype = 'link'
1154
1155 return {
1156 u'type': ptype,
1157 u'path': tarinfo.path,
1158 u'mode': tarinfo.mode,
1159 u'mtime': tarinfo.mtime,
1160 u'ctime': -1, # cannot restore
1161 u'uid': tarinfo.uid,
1162 u'gid': tarinfo.gid,
1163 u'inode': -1, # cannot restore
1164 u'size': tarinfo.size,
1165 u'member': tarinfo
ec57ce53
ERE
1166 }, 0
1167
26fdd428 1168 return TarPathIterator(self, tar_path, new_volume_handler)
24ddf0a2 1169
df99a044 1170 def jsonize_path_iterator(self, iter, strip=0):
d07c8065
ERE
1171 '''
1172 converts the yielded items of an iterator into json path lines.
df99a044
ERE
1173
1174 strip: Strip the smallest prefix containing num leading slashes from
1175 the file path.
d07c8065
ERE
1176 '''
1177 while True:
1178 try:
be60ffd0 1179 path = iter.__next__()
df99a044 1180 if strip == 0:
4ac6d333 1181 yield self._stat_dict(path), 0
df99a044
ERE
1182 else:
1183 st = self._stat_dict(path)
1184 st['path'] = "/".join(path.split("/")[strip:])
4ac6d333 1185 yield st, 0
d07c8065
ERE
1186 except StopIteration:
1187 break
1188
b84beea7
PG
1189 def iterate_disaster_index (self, index):
1190 """
1191 Mimick the behavior of the other object iterators, just with the inputs
1192 supplied directly as *index*.
1193 """
1194
1195 class RawIndexIterator(object):
65b35c42 1196 def __init__(self, delta_tar, index):
b84beea7
PG
1197 self.delta_tar = delta_tar
1198 self.index = index
1199 self.__enter__()
1200
1201 def __iter__(self):
1202 return self
1203
1204 def release(self):
65b35c42 1205 pass
b84beea7
PG
1206
1207 def __enter__(self):
1208 '''
1209 Allows this iterator to be used with the "with" statement
1210 '''
1211 self.iter = self.index.__iter__ ()
1212 return self
1213
1214 def __exit__(self, type, value, tb):
1215 '''
1216 Allows this iterator to be used with the "with" statement
1217 '''
1218
1219 def __next__(self):
1220 idxent = self.iter.__next__ ()
65b35c42 1221 return idxent, 0
b84beea7
PG
1222
1223 return RawIndexIterator(self, index)
1224
d07c8065
ERE
1225 def collate_iterators(self, it1, it2):
1226 '''
1227 Collate two iterators, so that it returns pairs of the items of each
1228 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1229 when there's no match for the items in the other iterator.
1230
1231 It assumes that the items in both lists are ordered in the same way.
1232 '''
ea6d3c3e 1233 l_no = 0
d07c8065
ERE
1234 elem1, elem2 = None, None
1235 while True:
1236 if not elem1:
1237 try:
be60ffd0 1238 elem1, l_no = it1.__next__()
d07c8065
ERE
1239 except StopIteration:
1240 if elem2:
ea6d3c3e 1241 yield (None, elem2, l_no)
d07c8065 1242 for elem2 in it2:
ea6d3c3e
ERE
1243 if isinstance(elem2, tuple):
1244 elem2 = elem2[0]
1245 yield (None, elem2, l_no)
d07c8065 1246 break
d07c8065
ERE
1247 if not elem2:
1248 try:
be60ffd0 1249 elem2 = it2.__next__()
d07c8065
ERE
1250 if isinstance(elem2, tuple):
1251 elem2 = elem2[0]
1252 except StopIteration:
1253 if elem1:
ea6d3c3e 1254 yield (elem1, None, l_no)
df99a044 1255 for elem1, l_no in it1:
ea6d3c3e 1256 yield (elem1, None, l_no)
d07c8065 1257 break
670f9934
ERE
1258
1259 index1 = self.unprefixed(elem1['path'])
1260 index2 = self.unprefixed(elem2['path'])
1261 i1, i2 = self.compare_indexes(index1, index2)
1262
1263 yield1 = yield2 = None
1264 if i1 is not None:
1265 yield1 = elem1
1266 elem1 = None
1267 if i2 is not None:
1268 yield2 = elem2
1269 elem2 = None
1270 yield (yield1, yield2, l_no)
1271
1272 def compare_indexes(self, index1, index2):
1273 '''
1274 Compare iterator indexes and return a tuple in the following form:
1275 if index1 < index2, returns (index1, None)
1276 if index1 == index2 returns (index1, index2)
1277 else: returns (None, index2)
1278 '''
1279 l1 = index1.split('/')
1280 l2 = index2.split('/')
1281 length = len(l2) - len(l1)
1282
1283 if length > 0:
1284 return (index1, None)
1285 elif length < 0:
1286 return (None, index2)
1287
1288 for i1, i2 in zip(l1, l2):
1289 if i1 < i2:
1290 return (index1, None)
1291 elif i1 > i2:
1292 return (None, index2)
1293
1294 return (index1, index2)
0708a374 1295
8c65a2b1 1296 def list_backup(self, backup_tar_path, list_func=None):
be60ffd0 1297 if not isinstance(backup_tar_path, str):
8c65a2b1
ERE
1298 raise Exception('Backup tar path must be a string')
1299
1300 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1301 raise Exception('Source path "%s" does not exist or is not a '\
1302 'file' % backup_tar_path)
1303
1304 if not os.access(backup_tar_path, os.R_OK):
1305 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1306
1307 cwd = os.getcwd()
1308
b7c47f38 1309 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
8c65a2b1
ERE
1310 '''
1311 Handles the new volumes
1312 '''
1313 volume_name = deltarobj.volume_name_func(backup_path, True,
1314 volume_number, guess_name=True)
1315 volume_path = os.path.join(backup_path, volume_name)
1316
1317 # we convert relative paths into absolute because CWD is changed
1318 if not os.path.isabs(volume_path):
1319 volume_path = os.path.join(cwd, volume_path)
b7c47f38
PG
1320 tarobj.open_volume(volume_path, encryption=encryption)
1321
774ca538
PG
1322 if self.decryptor is None:
1323 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
8c65a2b1
ERE
1324
1325 backup_path = os.path.dirname(backup_tar_path)
1326 if not os.path.isabs(backup_path):
1327 backup_path = os.path.join(cwd, backup_path)
133d30da 1328 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
b7a6566b 1329
8c65a2b1
ERE
1330 tarobj = tarfile.TarFile.open(backup_tar_path,
1331 mode='r' + self.mode,
1332 format=tarfile.GNU_FORMAT,
d1c38f40 1333 concat='#' in self.mode,
133d30da 1334 encryption=self.decryptor,
ea625b04 1335 new_volume_handler=new_volume_handler,
e2b59b34
ERE
1336 save_to_members=False,
1337 dereference=True)
8c65a2b1
ERE
1338
1339 def filter(cls, list_func, tarinfo):
1340 if list_func is None:
b008f989 1341 self.logger.info(tarinfo.path)
8c65a2b1
ERE
1342 else:
1343 list_func(tarinfo)
1344 return False
1345 filter = partial(filter, self, list_func)
1346
1347 tarobj.extractall(filter=filter)
1348 tarobj.close()
1349
0708a374 1350 def restore_backup(self, target_path, backup_indexes_paths=[],
e93f83f1 1351 backup_tar_path=None, restore_callback=None,
b84beea7 1352 disaster=tarfile.TOLERANCE_STRICT, backup_index=None):
0708a374
ERE
1353 '''
1354 Restores a backup.
1355
1356 Parameters:
0708a374
ERE
1357 - target_path: path to restore.
1358 - backup_indexes_paths: path to backup indexes, in descending date order.
1359 The indexes indicate the location of their respective backup volumes,
1360 and multiple indexes are needed to be able to restore diff backups.
1361 Note that this is an optional parameter: if not suplied, it will
1362 try to restore directly from backup_tar_path.
1363 - backup_tar_path: path to the backup tar file. Used as an alternative
1364 to backup_indexes_paths to restore directly from a tar file without
1365 using any file index. If it's a multivol tarfile, volume_name_func
1366 will be called.
4da27cfe 1367 - restore_callback: callback function to be called during restore.
b0aef801 1368 This is passed to the helper and gets called for every file.
11684b1d 1369
3a7e1a50 1370 NOTE: If you want to use an index to restore a backup, this function
11684b1d
ERE
1371 only supports to do so when the tarfile mode is either uncompressed or
1372 uses concat compress mode, because otherwise it would be very slow.
3a7e1a50
ERE
1373
1374 NOTE: Indices are assumed to follow the same format as the index_mode
1375 specified in the constructor.
e93f83f1
PG
1376
1377 Returns the list of files that could not be restored, if there were
1378 any.
0708a374 1379 '''
11684b1d 1380 # check/sanitize input
be60ffd0 1381 if not isinstance(target_path, str):
e5c6ca04
ERE
1382 raise Exception('Target path must be a string')
1383
11684b1d
ERE
1384 if backup_indexes_paths is None and backup_tar_path == []:
1385 raise Exception("You have to either provide index paths or a tar path")
e5c6ca04 1386
b84beea7
PG
1387 if isinstance (backup_index, list) is True:
1388 mode = "disaster"
1389 elif len(backup_indexes_paths) == 0:
ea6d3c3e
ERE
1390 mode = "tar"
1391 else:
1392 mode = "diff"
1393
1394 if mode == "tar":
be60ffd0 1395 if not isinstance(backup_tar_path, str):
11684b1d
ERE
1396 raise Exception('Backup tar path must be a string')
1397
1398 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1399 raise Exception('Source path "%s" does not exist or is not a '\
1400 'file' % backup_tar_path)
1401
1402 if not os.access(backup_tar_path, os.R_OK):
1403 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1404 else:
1405 if not isinstance(backup_indexes_paths, list):
1406 raise Exception('backup_indexes_paths must be a list')
1407
1408 if self.mode.startswith(':') or self.mode.startswith('|'):
1409 raise Exception('Restore only supports either uncompressed tars'
1410 ' or concat compression when restoring from an index, and '
1411 ' the open mode you provided is "%s"' % self.mode)
1412
1413 for index in backup_indexes_paths:
be60ffd0 1414 if not isinstance(index, str):
11684b1d 1415 raise Exception('indices must be strings')
e5c6ca04 1416
11684b1d
ERE
1417 if not os.path.exists(index) or not os.path.isfile(index):
1418 raise Exception('Index path "%s" does not exist or is not a '\
1419 'file' % index)
1420
1421 if not os.access(index, os.R_OK):
1422 raise Exception('Index path "%s" is not readable' % index)
e5c6ca04
ERE
1423
1424 # try to create backup path if needed
37ab0f57 1425 os.makedirs(target_path, exist_ok=True)
e5c6ca04 1426
ec57ce53
ERE
1427 # make backup_tar_path absolute so that iterate_tar_path works fine
1428 if backup_tar_path and not os.path.isabs(backup_tar_path):
1429 backup_tar_path = os.path.abspath(backup_tar_path)
1430
d5361dac 1431 cwd = os.getcwd()
ec57ce53 1432 os.chdir(target_path)
d5361dac 1433
2ae46844 1434 # setup for decrypting payload
774ca538
PG
1435 if self.decryptor is None:
1436 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
2ae46844 1437
ea6d3c3e 1438 if mode == 'tar':
24ddf0a2
ERE
1439 index_it = self.iterate_tar_path(backup_tar_path)
1440 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
ec57ce53 1441 tarobj=index_it.tar_obj)
ea6d3c3e 1442 elif mode == "diff":
04f4c7ab
PG
1443 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1444 disaster=disaster)
f3d10816
PG
1445 try:
1446 # get iterator from newest index at _data[0]
1447 index1 = helper._data[0]["path"]
1448 index_it = self.iterate_index_path(index1)
1449 except tarfile.DecryptionError as exn:
1450 self.logger.error("failed to decrypt file [%s]: %s; is this an "
afc87ebc
PG
1451 "actual encrypted index file?"
1452 % (index1, str (exn)))
1453 return [(index1, exn)]
1454 except Exception as exn:
1455 # compressed files
1456 self.logger.error("failed to read file [%s]: %s; is this an "
1457 "actual index file?" % (index1, str (exn)))
f3d10816 1458 return [(index1, exn)]
b84beea7
PG
1459 elif mode == "disaster":
1460 index_it = self.iterate_disaster_index (backup_index)
65b35c42
PG
1461 helper = RestoreHelper (self, cwd, backup_path=backup_tar_path,
1462 backup_index=backup_index,
1463 disaster=disaster)
b84beea7 1464
d07c8065 1465
24ddf0a2
ERE
1466 dir_it = self._recursive_walk_dir('.')
1467 dir_path_it = self.jsonize_path_iterator(dir_it)
11684b1d 1468
e93f83f1
PG
1469 failed = [] # irrecoverable files
1470
a395759e 1471 # for each file to be restored, do:
24ddf0a2
ERE
1472 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1473 if not ipath:
1474 upath = dpath['path']
1475 op_type = dpath['type']
1476 else:
1477 upath = self.unprefixed(ipath['path'])
1478 op_type = ipath['type']
42c04ead 1479
24ddf0a2 1480 # filter paths
75059f3c 1481 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
24ddf0a2 1482 continue
ea6d3c3e 1483
24ddf0a2
ERE
1484 # if types of the file mismatch, the file needs to be deleted
1485 # and re-restored
1486 if ipath is not None and dpath is not None and\
1487 dpath['type'] != ipath['type']:
1488 helper.delete(upath)
1489
1490 # if file not found in dpath, we can directly restore from index
1491 if not dpath:
1492 # if the file doesn't exist and it needs to be deleted, it
1493 # means that work is already done
1494 if ipath['path'].startswith('delete://'):
ea6d3c3e 1495 continue
24ddf0a2 1496 try:
b008f989 1497 self.logger.debug("restore %s" % ipath['path'])
4da27cfe 1498 helper.restore(ipath, l_no, restore_callback)
be60ffd0 1499 except Exception as e:
e93f83f1 1500 iipath = ipath.get ("path", "")
7b07645e 1501 self.logger.error("FAILED to restore: {} ({})"
e93f83f1 1502 .format(iipath, e))
04f4c7ab 1503 if disaster != tarfile.TOLERANCE_STRICT:
e93f83f1 1504 failed.append ((iipath, e))
24ddf0a2 1505 continue
11684b1d 1506
24ddf0a2
ERE
1507 # if both files are equal, we have nothing to restore
1508 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1509 continue
1510
1511 # we have to restore the file, but first we need to delete the
1512 # current existing file.
1513 # we don't delete the file if it's a directory, because it might
1514 # just have changed mtime, so it's quite inefficient to remove
1515 # it
1516 if ipath:
1517 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
42c04ead 1518 helper.delete(upath)
b008f989 1519 self.logger.debug("restore %s" % ipath['path'])
e93f83f1
PG
1520 try:
1521 helper.restore(ipath, l_no, restore_callback)
1522 except Exception as e:
04f4c7ab 1523 if disaster == tarfile.TOLERANCE_STRICT:
e93f83f1
PG
1524 raise
1525 failed.append ((ipath.get ("path", ""), e))
1526 continue
24ddf0a2
ERE
1527
1528 # if the file is not in the index (so it comes from the target
1529 # directory) then we have to delete it
1530 else:
c9d47a03 1531 self.logger.debug("delete %s" % upath)
24ddf0a2 1532 helper.delete(upath)
42c04ead 1533
ec57ce53
ERE
1534 helper.restore_directories_permissions()
1535 index_it.release()
1536 os.chdir(cwd)
1537 helper.cleanup()
ea6d3c3e 1538
e93f83f1
PG
1539 return failed
1540
1541
1542 def recover_backup(self, target_path, backup_indexes_paths=[],
1543 restore_callback=None):
1544 """
1545 Walk the index, extracting objects in disaster mode. Bad files are
1546 reported along with a reason.
1547 """
1548 return self.restore_backup(target_path,
1549 backup_indexes_paths=backup_indexes_paths,
04f4c7ab
PG
1550 disaster=tarfile.TOLERANCE_RECOVER)
1551
1552
6690f5e0 1553 def rescue_backup(self, target_path, backup_tar_path,
04f4c7ab
PG
1554 restore_callback=None):
1555 """
1556 More aggressive “unfsck” mode: do not rely on the index data as the
1557 files may be corrupt; skim files for header-like information and
1558 attempt to retrieve the data.
1559 """
27ee4dd4
PG
1560 def gen_volume_name (nvol):
1561 return os.path.join (os.path.dirname (backup_tar_path),
1562 self.volume_name_func (backup_tar_path,
1563 True,
1564 nvol))
1565
1566 backup_index = tarfile.gen_rescue_index (gen_volume_name,
1567 self.mode,
1568 password=self.password,
1569 key=self.crypto_key)
6690f5e0 1570
04f4c7ab 1571 return self.restore_backup(target_path,
b84beea7 1572 backup_index=backup_index,
65b35c42 1573 backup_tar_path=backup_tar_path,
04f4c7ab 1574 disaster=tarfile.TOLERANCE_RESCUE)
e93f83f1
PG
1575
1576
11684b1d
ERE
1577 def _parse_json_line(self, f, l_no):
1578 '''
ee0e095f 1579 Read line from file like object and process it as JSON.
11684b1d
ERE
1580 '''
1581 l = f.readline()
1582 l_no += 1
1583 try:
be60ffd0 1584 j = json.loads(l.decode('UTF-8'))
ee0e095f
PG
1585 except UnicodeDecodeError as e:
1586 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1587 raise Exception \
1588 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1589 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1590 from e
1591 raise Exception \
1592 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1593 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1594 from e
be60ffd0 1595 except ValueError as e:
11684b1d
ERE
1596 raise Exception("error parsing this json line "
1597 "(line number %d): %s" % (l_no, l))
1598 return j, l_no
ea6d3c3e 1599
24ddf0a2 1600
ea6d3c3e
ERE
1601class RestoreHelper(object):
1602 '''
1603 Class used to help to restore files from indices
1604 '''
1605
1606 # holds the dicts of data
1607 _data = []
1608
1609 _deltatar = None
1610
1611 _cwd = None
1612
0501fe0a
ERE
1613 # list of directories to be restored. This is done as a last step, see
1614 # tarfile.extractall for details.
1615 _directories = []
1616
04f4c7ab 1617 _disaster = tarfile.TOLERANCE_STRICT
e93f83f1 1618
037994ca 1619 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
65b35c42
PG
1620 backup_index=None, tarobj=None,
1621 disaster=tarfile.TOLERANCE_STRICT):
ea6d3c3e
ERE
1622 '''
1623 Constructor opens the tars and init the data structures.
1624
037994ca
PG
1625 Assumptions:
1626
1627 - Index list must be provided in reverse order (newer first).
1628 - “newer first” apparently means that if there are n backups
1629 provided, the last full backup is at index n-1 and the most recent
1630 diff backup is at index 0.
1631 - Only the first, the second, and the last elements of
1632 ``index_list`` are relevant, others will not be accessed.
1633 - If no ``index_list`` is provided, both ``tarobj`` and
1634 ``backup_path`` must be passed.
1635 - If ``index_list`` is provided, the values of ``tarobj`` and
1636 ``backup_path`` are ignored.
ea6d3c3e
ERE
1637 '''
1638 self._data = []
0501fe0a 1639 self._directories = []
ea6d3c3e
ERE
1640 self._deltatar = deltatar
1641 self._cwd = cwd
3031b7ae 1642 self._password = deltatar.password
1f3fd7b0 1643 self._crypto_key = deltatar.crypto_key
3031b7ae 1644 self._decryptors = []
e93f83f1 1645 self._disaster = disaster
ea6d3c3e 1646
253d4cdd
ERE
1647 try:
1648 import grp, pwd
1649 except ImportError:
1650 grp = pwd = None
1651
1652 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1653 self.canchown = True
1654 else:
1655 self.canchown = False
1656
65b35c42 1657 if isinstance (backup_index, list) is True:
001bd488 1658 decryptor = self._deltatar.decryptor
65b35c42
PG
1659 self._data = \
1660 [{ "curr_vol_no" : None
1661 , "vol_fd" : None
1662 , "offset" : -1
1663 , "tarobj" : None
1664 , "path" : backup_path
1665 , "is_full" : True
1666 , "iterator" : None
1667 , "last_itelement" : None
1668 , "last_lno" : 0
001bd488
PG
1669 , "new_volume_handler" :
1670 partial(self.new_volume_handler,
1671 self._deltatar, self._cwd, True,
1672 os.path.dirname(backup_path), decryptor)
1673 , "decryptor" : decryptor
65b35c42
PG
1674 }]
1675 elif index_list is not None:
24ddf0a2 1676 for index in index_list:
037994ca 1677 is_full = index == index_list[-1]
24ddf0a2 1678
d5e1d60f 1679 decryptor = None
3031b7ae 1680 if self._password is not None:
1f3fd7b0
PG
1681 decryptor = crypto.Decrypt (password=self._password,
1682 key=self._crypto_key)
d5e1d60f 1683
24ddf0a2
ERE
1684 # make paths absolute to avoid cwd problems
1685 if not os.path.isabs(index):
1686 index = os.path.normpath(os.path.join(cwd, index))
1687
1688 s = dict(
1689 curr_vol_no = None,
1690 vol_fd = None,
1691 offset = -1,
1692 tarobj = None,
1693 path = index,
1694 is_full = is_full,
1695 iterator = None,
1696 last_itelement = None,
1697 last_lno = 0,
1698 new_volume_handler = partial(self.new_volume_handler,
1699 self._deltatar, self._cwd, is_full,
d5e1d60f
PG
1700 os.path.dirname(index), decryptor),
1701 decryptor = decryptor
24ddf0a2
ERE
1702 )
1703 self._data.append(s)
1704 else:
ea6d3c3e 1705 # make paths absolute to avoid cwd problems
24ddf0a2
ERE
1706 if not os.path.isabs(backup_path):
1707 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
ea6d3c3e 1708
ec57ce53
ERE
1709 # update the new_volume_handler of tar_obj
1710 tarobj.new_volume_handler = partial(self.new_volume_handler,
b7c47f38 1711 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
133d30da 1712 self._deltatar.decryptor)
ea6d3c3e
ERE
1713 s = dict(
1714 curr_vol_no = None,
1715 vol_fd = None,
1716 offset = -1,
24ddf0a2
ERE
1717 tarobj = tarobj,
1718 path = backup_path,
1719 is_full = True,
670f9934
ERE
1720 iterator = None,
1721 last_itelement = None,
1722 last_lno = 0,
d5e1d60f
PG
1723 new_volume_handler = tarobj.new_volume_handler,
1724 decryptor = self._deltatar.decryptor
ea6d3c3e
ERE
1725 )
1726 self._data.append(s)
1727
3031b7ae 1728
ea6d3c3e
ERE
1729 def cleanup(self):
1730 '''
1731 Closes all open files
1732 '''
1733 for data in self._data:
55b2ffd0
ERE
1734 if data['vol_fd']:
1735 data['vol_fd'].close()
1736 data['vol_fd'] = None
ea6d3c3e
ERE
1737 if data['tarobj']:
1738 data['tarobj'].close()
1739 data['tarobj'] = None
ea6d3c3e
ERE
1740
1741 def delete(self, path):
1742 '''
1743 Delete a file
1744 '''
df99a044
ERE
1745 if not os.path.exists(path):
1746 return
1747
24ddf0a2 1748 # to preserve parent directory mtime, we save it
283fbd5e 1749 parent_dir = os.path.dirname(path) or os.getcwd()
24ddf0a2
ERE
1750 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1751
561bc39f 1752 if os.path.isdir(path) and not os.path.islink(path):
ea6d3c3e
ERE
1753 shutil.rmtree(path)
1754 else:
1755 os.unlink(path)
1756
24ddf0a2
ERE
1757 # now we restore parent_directory mtime
1758 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1759
4da27cfe 1760 def restore(self, itpath, l_no, callback=None):
ea6d3c3e 1761 '''
8a54d5dd 1762 Restore the path from the appropriate backup. Receives the current path
e8d95fe5 1763 from the newest (=first) index iterator. itpath must be not null.
b0aef801 1764 callback is a custom function that gets called for every file.
037994ca
PG
1765
1766 NB: This function takes the attribute ``_data`` as input but will only
1767 ever use its first and, if available, second element. Anything else in
1768 ``._data[]`` will be ignored.
ea6d3c3e 1769 '''
ea6d3c3e
ERE
1770 path = itpath['path']
1771
4da27cfe
SA
1772 # Calls the callback function
1773 if callback:
1774 callback()
1775
ea6d3c3e 1776 if path.startswith('delete://'):
df86af81
ERE
1777 # the file has previously been deleted already in restore_backup in
1778 # all cases so we just need to finish
ea6d3c3e 1779 return
df86af81 1780
e8d95fe5 1781 # get data from newest index (_data[0])
df86af81
ERE
1782 data = self._data[0]
1783 upath = self._deltatar.unprefixed(path)
1784
24ddf0a2 1785 # to preserve parent directory mtime, we save it
283fbd5e 1786 parent_dir = os.path.dirname(upath) or os.getcwd()
37ab0f57 1787 os.makedirs(parent_dir, exist_ok=True)
24ddf0a2
ERE
1788 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1789
e8d95fe5 1790 # if path is found in the newest index as to be snapshotted, deal with it
df86af81
ERE
1791 # and finish
1792 if path.startswith('snapshot://'):
65b35c42 1793 self.restore_file(itpath, data, path, l_no, upath)
24ddf0a2
ERE
1794
1795 # now we restore parent_directory mtime
1796 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
ea6d3c3e
ERE
1797 return
1798
1799 # we go from index to index, finding the path in the index, then finding
1800 # the index with the most recent snapshot of the file being restored
e8d95fe5
TJ
1801 #
1802 # Right now we support diff backups, only. No incremental backups.
1803 # As a result _data[0] is always the diff backup index
1804 # and _data[1] the full backup index.
527670c4 1805 if len(self._data) == 2:
7273719c 1806 data = self._data[1]
527670c4
TJ
1807 d, l_no, dpath = self.find_path_in_index(data, upath)
1808 if not d:
1809 self._deltatar.logger.warning('Error restoring file %s from '
1810 'index, not found in index %s' % (path, data['path']))
1811 return
1812
1813 cur_path = d.get('path', '')
1814 if cur_path.startswith('delete://'):
1815 self._deltatar.logger.warning(('Strange thing happened, file '
1816 '%s was listed in first index but deleted by another '
1817 'one. Path was ignored and untouched.') % path)
1818 return
1819 elif cur_path.startswith('snapshot://'):
1820 # this code path is reached when the file is unchanged
1821 # in the newest index and therefore of type 'list://'
1822 self.restore_file(d, data, path, l_no, dpath)
1823
1824 # now we restore parent_directory mtime
1825 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1826 return
1827
1828 # error code path is reached when:
1829 # a) we have more than two indexes (unsupported atm)
1830 # b) both indexes contain a list:// entry (logic error)
1831 # c) we have just one index and it also contains list://
4bda6f45 1832 self._deltatar.logger.warning(('Error restoring file %s from index, '
ea6d3c3e
ERE
1833 'snapshot not found in any index') % path)
1834
670f9934
ERE
1835 def find_path_in_index(self, data, upath):
1836 # NOTE: we restart the iterator sometimes because the iterator can be
1837 # walked over completely multiple times, for example if one path if not
1838 # found in one index and we have to go to the next index.
7273719c
PG
1839 it = data['iterator']
1840 if it is None:
670f9934 1841 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
be60ffd0 1842 d, l_no = it.__next__()
670f9934 1843 else:
670f9934
ERE
1844 d = data['last_itelement']
1845 l_no = data['last_lno']
1846
670f9934 1847 while True:
7273719c 1848 dpath = self._deltatar.unprefixed(d.get('path', ''))
670f9934
ERE
1849 if upath == dpath:
1850 data['last_itelement'] = d
1851 data['last_lno'] = l_no
1852 return d, l_no, dpath
1853
1854 up, dp = self._deltatar.compare_indexes(upath, dpath)
1855 # any time upath should have appeared before current dpath, it means
1856 # upath is just not in this index and we should stop
1857 if dp is None:
1858 data['last_itelement'] = d
1859 data['last_lno'] = l_no
1860 return None, 0, ''
1861
1862 try:
be60ffd0 1863 d, l_no = it.__next__()
670f9934
ERE
1864 except StopIteration:
1865 data['last_itelement'] = d
1866 data['last_lno'] = l_no
1867 return None, 0, ''
670f9934 1868
0501fe0a
ERE
1869 def restore_directories_permissions(self):
1870 '''
1871 Restore directory permissions when everything have been restored
1872 '''
42c04ead
ERE
1873 try:
1874 import grp, pwd
1875 except ImportError:
1876 grp = pwd = None
1877
0501fe0a
ERE
1878 self._directories.sort(key=operator.attrgetter('name'))
1879 self._directories.reverse()
0501fe0a
ERE
1880
1881 # Set correct owner, mtime and filemode on directories.
1882 for member in self._directories:
1883 dirpath = member.name
1884 try:
42c04ead
ERE
1885 os.chmod(dirpath, member.mode)
1886 os.utime(dirpath, (member.mtime, member.mtime))
253d4cdd 1887 if self.canchown:
42c04ead
ERE
1888 # We have to be root to do so.
1889 try:
1890 g = grp.getgrnam(member.gname)[2]
1891 except KeyError:
1892 g = member.gid
1893 try:
1894 u = pwd.getpwnam(member.uname)[2]
1895 except KeyError:
1896 u = member.uid
1897 try:
4e433e00 1898 if member.issym and hasattr(os, "lchown"):
42c04ead
ERE
1899 os.lchown(dirpath, u, g)
1900 else:
1901 os.chown(dirpath, u, g)
1902 except EnvironmentError:
1903 raise tarfile.ExtractError("could not change owner")
1904
be60ffd0 1905 except tarfile.ExtractError as e:
4bda6f45 1906 self._deltatar.logger.warning('tarfile: %s' % e)
0501fe0a 1907
df86af81 1908 @staticmethod
b7c47f38 1909 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
ea6d3c3e
ERE
1910 '''
1911 Handles the new volumes
1912 '''
df86af81
ERE
1913 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1914 volume_number, guess_name=True)
ea6d3c3e
ERE
1915 volume_path = os.path.join(backup_path, volume_name)
1916
1917 # we convert relative paths into absolute because CWD is changed
1918 if not os.path.isabs(volume_path):
1919 volume_path = os.path.join(cwd, volume_path)
b7c47f38 1920 tarobj.open_volume(volume_path, encryption=encryption)
ea6d3c3e 1921
253d4cdd 1922 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
ea6d3c3e
ERE
1923 '''
1924 Restores a snapshot of a file from a specific backup
1925 '''
ea6d3c3e 1926 op_type = file_data.get('type', -1)
24ddf0a2 1927 member = file_data.get('member', None)
9f9ae874 1928 ismember = bool(member)
24ddf0a2
ERE
1929
1930 # when member is set, then we can assume everything is right and we
1931 # just have to restore the path
a2a37de7 1932 if member is None:
24ddf0a2
ERE
1933 vol_no = file_data.get('volume', -1)
1934 # sanity check
1935 if not isinstance(vol_no, int) or vol_no < 0:
4bda6f45 1936 self._deltatar.logger.warning('unrecognized type to be restored: '
24ddf0a2
ERE
1937 '%s, line %d' % (op_type, l_no))
1938
1939 # setup the volume that needs to be read. only needed when member is
1940 # not set
a2a37de7 1941 if index_data['curr_vol_no'] != vol_no:
24ddf0a2
ERE
1942 index_data['curr_vol_no'] = vol_no
1943 backup_path = os.path.dirname(index_data['path'])
1944 vol_name = self._deltatar.volume_name_func(backup_path,
1945 index_data['is_full'], vol_no, guess_name=True)
1946 vol_path = os.path.join(backup_path, vol_name)
1947 if index_data['vol_fd']:
1948 index_data['vol_fd'].close()
be60ffd0 1949 index_data['vol_fd'] = open(vol_path, 'rb')
24ddf0a2
ERE
1950
1951 # force reopen of the tarobj because of new volume
1952 if index_data['tarobj']:
1953 index_data['tarobj'].close()
1954 index_data['tarobj'] = None
1955
1956 # seek tarfile if needed
1957 offset = file_data.get('offset', -1)
ea6d3c3e 1958 if index_data['tarobj']:
c52fd26b 1959 if self._disaster == tarfile.TOLERANCE_RESCUE:
24ddf0a2
ERE
1960 # force a seek and reopen
1961 index_data['tarobj'].close()
1962 index_data['tarobj'] = None
c52fd26b
PG
1963 else:
1964 try:
1965 member = index_data['tarobj'].__iter__().__next__()
1966 except tarfile.DecryptionError:
1967 pass
1968 except tarfile.CompressionError:
1969 pass
1970
1971 if not member or member.path != file_data['path']:
1972 # force a seek and reopen
1973 index_data['tarobj'].close()
1974 index_data['tarobj'] = None
1975
24ddf0a2
ERE
1976
1977 # open the tarfile if needed
1978 if not index_data['tarobj']:
1979 index_data['vol_fd'].seek(offset)
1980 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1981 fileobj=index_data['vol_fd'],
1982 format=tarfile.GNU_FORMAT,
d1c38f40 1983 concat='#' in self._deltatar.mode,
d5e1d60f 1984 encryption=index_data["decryptor"],
253d4cdd 1985 new_volume_handler=index_data['new_volume_handler'],
044585c6 1986 save_to_members=False,
04f4c7ab 1987 tolerance=self._disaster)
24ddf0a2 1988
be60ffd0 1989 member = index_data['tarobj'].__iter__().__next__()
ea6d3c3e 1990
253d4cdd
ERE
1991 member.path = unprefixed_path
1992 member.name = unprefixed_path
0501fe0a
ERE
1993
1994 if op_type == 'directory':
253d4cdd 1995 self.add_member_dir(member)
0501fe0a 1996 member = copy.copy(member)
be60ffd0 1997 member.mode = 0o0700
0501fe0a 1998
df86af81
ERE
1999 # if it's an existing directory, we then don't need to recreate it
2000 # just set the right permissions, mtime and that kind of stuff
2001 if os.path.exists(member.path):
2002 return
2003
9f9ae874 2004 if not ismember:
24ddf0a2
ERE
2005 # set current volume number in tarobj, otherwise the extraction of the
2006 # file might fail when trying to extract a multivolume member
2007 index_data['tarobj'].volume_number = index_data['curr_vol_no']
86a6e741 2008
9b13f5c4
PG
2009 def ignore_symlink (member, *_args):
2010 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
786addd6 2011
ea6d3c3e 2012 # finally, restore the file
9b13f5c4 2013 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
253d4cdd
ERE
2014
2015 def add_member_dir(self, member):
2016 '''
2017 Add member dir to be restored at the end
2018 '''
4e433e00 2019 if not self.canchown:
253d4cdd
ERE
2020 self._directories.append(DirItem(name=member.name, mode=member.mode,
2021 mtime=member.mtime))
2022 else:
2023 self._directories.append(DirItem(name=member.name, mode=member.mode,
2024 mtime=member.mtime, gname=member.gname, uname=member.uname,
4e433e00 2025 uid=member.uid, gid=member.gid, issym=member.issym()))
253d4cdd
ERE
2026
2027class DirItem(object):
2028 def __init__(self, **kwargs):
be60ffd0 2029 for k, v in kwargs.items():
9f9ae874 2030 setattr(self, k, v)