use real new volume handler during rescue
[python-delta-tar] / deltatar / deltatar.py
CommitLineData
6b2fa38f 1#!/usr/bin/env python3
0708a374 2
51797cd6 3# Copyright (C) 2013, 2014 Intra2net AG
0708a374
ERE
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; either version 3 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see
17# <http://www.gnu.org/licenses/lgpl-3.0.html>
18
19# Author: Eduardo Robles Elvira <edulix@wadobo.com>
20
938c2d54
PG
21DELTATAR_HEADER_VERSION = 1
22DELTATAR_PARAMETER_VERSION = 1
3fdea6d4 23
0708a374
ERE
24import logging
25import datetime
6c678f3a 26import binascii
938c2d54 27import io
0501fe0a 28import operator
0708a374 29import os
0501fe0a 30import copy
82de3376 31import shutil
8a8fadda 32import re
e82f14f5
ERE
33import stat
34import json
0708a374
ERE
35from functools import partial
36
37from . import tarfile
2ae46844 38from . import crypto
0708a374 39
0708a374
ERE
40class NullHandler(logging.Handler):
41 def emit(self, record):
42 pass
24ddf0a2
ERE
43
44
0708a374
ERE
45logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
46
974408b5
ERE
47
48# match mode
49NO_MATCH = False
50MATCH = True
51PARENT_MATCH = 2
52
133d30da
PG
53# encryption direction
54CRYPTO_MODE_ENCRYPT = 0
55CRYPTO_MODE_DECRYPT = 1
56
13cc7dfc
PG
57# The canonical extension for encrypted backup files regardless of the actual
58# encryption parameters is “.pdtcrypt”. This is analogous to the encryption
59# header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
60# Since the introduction of the versioned header there no longer any need
61# for encoding encryption parameters in the file extensions (“.aes128” and
62# suchlike).
63PDTCRYPT_EXTENSION = "pdtcrypt"
2cdd9faf
PG
64PDT_TYPE_ARCHIVE = 0
65PDT_TYPE_AUX = 1
13cc7dfc 66
9eccb1c2
PG
67AUXILIARY_FILE_INDEX = 0
68AUXILIARY_FILE_INFO = 1
69
0708a374
ERE
70class DeltaTar(object):
71 '''
72 Backup class used to create backups
73 '''
74
75 # list of files to exclude in the backup creation or restore operation. It
76 # can contain python regular expressions.
77 excluded_files = []
78
79 # list of files to include in the backup creation or restore operation. It
80 # can contain python regular expressions. If empty, all files in the source
81 # path will be backed up (when creating a backup) or all the files in the
a83fa4ed 82 # backup will be restored (when restoring a backup), but if included_files
0708a374
ERE
83 # is set then only the files include in the list will be processed.
84 included_files = []
85
86 # custom filter of files to be backed up (or restored). Unused and unset
87 # by default. The function receives a file path and must return a boolean.
88 filter_func = None
89
da26094a
ERE
90 # mode in which the delta will be created (when creating a backup) or
91 # opened (when restoring). Accepts modes analog to the tarfile library.
92 mode = ""
0708a374
ERE
93
94 # used together with aes modes to encrypt and decrypt backups.
95 password = None
1f3fd7b0
PG
96 crypto_key = None
97 nacl = None
0708a374 98
dbee011c
PG
99 # parameter version to use when encrypting; note that this has no effect
100 # on decryption since the required settings are determined from the headers
54f909ca 101 crypto_version = DELTATAR_HEADER_VERSION
dbee011c
PG
102 crypto_paramversion = None
103
133d30da 104 # when encrypting or decrypting, these hold crypto handlers; created before
2ae46844 105 # establishing the Tarfile stream iff a password is supplied.
133d30da
PG
106 encryptor = None
107 decryptor = None
2ae46844 108
0708a374
ERE
109 # python logger object.
110 logger = None
111
3a7e1a50
ERE
112 # specifies the index mode in the same format as @param mode, but without
113 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
2ae46844 114 # that the index is encrypted if no password is given in the constructor.
3a7e1a50 115 index_mode = None
0708a374
ERE
116
117 # current time for this backup. Used for file names and file creation checks
118 current_time = None
119
9eae9a1f
ERE
120 # extra data to included in the header of the index file when creating a
121 # backup
122 extra_data = dict()
123
0708a374
ERE
124 # valid tarfile modes and their corresponding default file extension
125 __file_extensions_dict = {
da26094a
ERE
126 '': '',
127 ':': '',
128 ':gz': '.gz',
129 ':bz2': '.bz2',
130 '|': '',
131 '|gz': '.gz',
132 '|bz2': '.bz2',
133 '#gz': '.gz',
6e99d23a
PG
134 '#gz.pdtcrypt': '.gz',
135 '#pdtcrypt': '',
d1c38f40 136 '#': '',
0708a374
ERE
137 }
138
3a7e1a50
ERE
139 # valid index modes and their corresponding default file extension
140 __index_extensions_dict = {
141 '': '',
142 'gz': '.gz',
143 'bz2': '.bz2',
6e99d23a
PG
144 'gz.pdtcrypt': '.gz',
145 'pdtcrypt': '',
3a7e1a50
ERE
146 }
147
8adbe50d
ERE
148 # valid path prefixes
149 __path_prefix_list = [
150 u'snapshot://',
151 u'list://',
152 u'delete://'
153 ]
154
0708a374 155 def __init__(self, excluded_files=[], included_files=[],
da26094a 156 filter_func=None, mode="", password=None,
1f3fd7b0 157 crypto_key=None, nacl=None,
54f909ca 158 crypto_version=DELTATAR_HEADER_VERSION,
dbee011c 159 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
3a7e1a50 160 logger=None, index_mode=None, index_name_func=None,
0708a374
ERE
161 volume_name_func=None):
162 '''
163 Constructor. Configures the diff engine.
164
165 Parameters:
166 - excluded_files: list of files to exclude in the backup creation or
167 restore operation. It can contain python regular expressions.
168
169 - included_files: list of files to include in the backup creation or
170 restore operation. It can contain python regular expressions. If
171 empty, all files in the source path will be backed up (when creating a
172 backup) or all the files in the backup will be restored (when
a83fa4ed 173 restoring a backup), but if included_files is set then only the files
0708a374
ERE
174 include in the list will be processed.
175
176 - filter_func: custom filter of files to be backed up (or restored).
177 Unused and unset by default. The function receives a file path and
178 must return a boolean.
179
180 - mode: mode in which the delta will be created (when creating a backup)
181 or opened (when restoring). Accepts the same modes as the tarfile
182 library. Valid modes are:
183
da26094a
ERE
184 '' open uncompressed
185 ':' open uncompressed
186 ':gz' open with gzip compression
187 ':bz2' open with bzip2 compression
188 '|' open an uncompressed stream of tar blocks
189 '|gz' open a gzip compressed stream of tar blocks
190 '|bz2' open a bzip2 compressed stream of tar blocks
191 '#gz' open a stream of gzip compressed tar blocks
0708a374 192
1f3fd7b0
PG
193 - crypto_key: used to encrypt and decrypt backups. Encryption will
194 be enabled automatically if a key is supplied. Requires a salt to be
195 passed as well.
196
197 - nacl: salt that was used to derive the encryption key for embedding
198 in the PDTCRYPT header. Not needed when decrypting and when
199 encrypting with password.
200
6e99d23a
PG
201 - password: used to encrypt and decrypt backups. Encryption will be
202 enabled automatically if a password is supplied.
0708a374 203
54f909ca
PG
204 - crypto_version: version of the format, determining the kind of PDT
205 object header.
206
dbee011c
PG
207 - crypto_paramversion: optionally request encryption conforming to
208 a specific parameter version. Defaults to the standard PDT value
209 which as of 2017 is the only one available.
210
0708a374
ERE
211 - logger: python logger object. Optional.
212
3a7e1a50 213 - index_mode: specifies the index mode in the same format as @param
6e99d23a
PG
214 mode, but without the ':', '|' or '#' at the begining. If encryption
215 is requested it will extend to the auxiliary (index, info) files as
216 well. This is an optional parameter that will automatically mimic
217 @param mode by default if not provided. Valid modes are:
3a7e1a50
ERE
218
219 '' open uncompressed
220 'gz' open with gzip compression
221 'bz2' open with bzip2 compression
0708a374
ERE
222
223 - index_name_func: function that sets a custom name for the index file.
2cc6e32b
PG
224 This function receives a flag to indicate whether the name will be
225 used for a full or diff backup. The backup path will be prepended to
226 its return value.
0708a374
ERE
227
228 - volume_name_func: function that defines the name of tar volumes. It
229 receives the backup_path, if it's a full backup and the volume number,
230 and must return the name for the corresponding volume name. Optional,
231 DeltaTar has default names for tar volumes.
232 '''
233
da26094a 234 if mode not in self.__file_extensions_dict:
8a54d5dd
PG
235 raise Exception('Unrecognized extension mode=[%s] requested for files'
236 % str(mode))
0708a374
ERE
237
238 self.excluded_files = excluded_files
239 self.included_files = included_files
240 self.filter_func = filter_func
241 self.logger = logging.getLogger('deltatar.DeltaTar')
242 if logger:
243 self.logger.addHandler(logger)
244 self.mode = mode
2ae46844 245
1f3fd7b0
PG
246 if crypto_key is not None:
247 self.crypto_key = crypto_key
248 self.nacl = nacl # encryption only
249
2ae46844
PG
250 if password is not None:
251 self.password = password
3a7e1a50 252
54f909ca
PG
253 if crypto_version is not None:
254 self.crypto_version = crypto_version
255
dbee011c
PG
256 if crypto_paramversion is not None:
257 self.crypto_paramversion = crypto_paramversion
258
3a7e1a50
ERE
259 # generate index_mode
260 if index_mode is None:
261 index_mode = ''
6e99d23a 262 if 'gz' in mode:
3a7e1a50
ERE
263 index_mode = "gz"
264 elif 'bz2' in mode:
265 index_mode = "bz2"
266 elif mode not in self.__index_extensions_dict:
8a54d5dd
PG
267 raise Exception('Unrecognized extension mode=[%s] requested for index'
268 % str(mode))
3a7e1a50
ERE
269
270 self.index_mode = index_mode
0708a374
ERE
271 self.current_time = datetime.datetime.now()
272
273 if index_name_func is not None:
274 self.index_name_func = index_name_func
275
276 if volume_name_func is not None:
277 self.volume_name_func = volume_name_func
278
e54cfec5 279 def pick_extension(self, kind, mode=None):
2cdd9faf
PG
280 """
281 Choose the extension depending on a) the kind of file given, b) the
282 processing mode, and c) the current encryption settings.
283 """
284 ret = ""
285 if kind == PDT_TYPE_ARCHIVE:
286 ret += ".tar"
e54cfec5
PG
287 if mode is None:
288 mode = self.__index_extensions_dict [self.index_mode]
2cdd9faf 289 ret += mode
a83fa4ed 290 if self.crypto_key is not None or self.password is not None:
2cdd9faf
PG
291 ret += "." + PDTCRYPT_EXTENSION
292 return ret
293
f0287fb7 294 def index_name_func(self, is_full): # pylint: disable=method-hidden
0708a374 295 '''
2cc6e32b
PG
296 Callback for setting a custom name for the index file. Depending on
297 whether *is_full* is set, it will create a suitable name for a full
298 or a diff backup.
0708a374
ERE
299 '''
300 prefix = "bfull" if is_full else "bdiff"
f7940c31 301 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf
PG
302 extension = self.pick_extension \
303 (PDT_TYPE_AUX,
304 self.__index_extensions_dict [self.index_mode])
0708a374 305
da26094a 306 return "%s-%s.index%s" % (prefix, date_str, extension)
0708a374 307
f0287fb7
CH
308 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
309 is_full, volume_number,
310 guess_name=False):
0708a374
ERE
311 '''
312 function that defines the name of tar volumes. It receives the
313 backup_path, if it's a full backup and the volume number, and must return
314 the name for the corresponding volume name. Optional, DeltaTar has default
315 names for tar volumes.
df86af81
ERE
316
317 If guess_name is activated, the file is intended not to be created but
318 to be found, and thus the date will be guessed.
0708a374
ERE
319 '''
320 prefix = "bfull" if is_full else "bdiff"
2cdd9faf
PG
321 extension = self.pick_extension \
322 (PDT_TYPE_ARCHIVE,
323 self.__file_extensions_dict [self.mode])
0708a374 324
df86af81 325 if not guess_name:
f7940c31 326 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf 327 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
df86af81
ERE
328 else:
329 prefix = prefix + "-"
90b75470 330 postfix = "-%03d%s" % (volume_number + 1, extension)
86a6e741
ERE
331 for f in os.listdir(backup_path):
332 if f.startswith(prefix) and f.endswith(postfix):
333 return f
df86af81
ERE
334 raise Exception("volume not found")
335
0708a374 336
974408b5 337 def filter_path(self, path, source_path="", is_dir=None):
8a8fadda
ERE
338 '''
339 Filters a path, given the source_path, using the filtering properties
340 set in the constructor.
341 The filtering order is:
342 1. included_files (if any)
343 2. excluded_files
344 3. filter_func (which must return whether the file is accepted or not)
345 '''
75059f3c 346
c1af2184 347 if len(source_path) > 0:
75059f3c
CH
348 # ensure that exactly one '/' at end of dir is also removed
349 source_path = source_path.rstrip(os.sep) + os.sep
8a8fadda
ERE
350 path = path[len(source_path):]
351
352 # 1. filter included_files
974408b5 353 match = MATCH
8a8fadda 354 if len(self.included_files) > 0:
974408b5 355 match = NO_MATCH
8a8fadda
ERE
356 for i in self.included_files:
357 # it can be either a regexp or a string
be60ffd0 358 if isinstance(i, str):
8a8fadda
ERE
359 # if the string matches, then continue
360 if i == path:
974408b5 361 match = MATCH
c1af2184 362 break
8a8fadda
ERE
363
364 # if the string ends with / it's a directory, and if the
7b07645e 365 # path is contained in it, it is included
c1af2184 366 if i.endswith('/') and path.startswith(i):
974408b5 367 match = MATCH
c1af2184 368 break
8a8fadda
ERE
369
370 # if the string doesn't end with /, add it and do the same
371 # check
c1af2184 372 elif path.startswith(i + '/'):
974408b5 373 match = MATCH
c1af2184 374 break
8a8fadda 375
974408b5
ERE
376 # check for PARENT_MATCH
377 if is_dir:
378 dir_path = path
379 if not dir_path.endswith('/'):
380 dir_path += '/'
381
382 if i.startswith(dir_path):
383 match = PARENT_MATCH
384
8a8fadda
ERE
385 # if it's a reg exp, then we just check if it matches
386 elif isinstance(i, re._pattern_type):
c1af2184 387 if i.match(path):
974408b5 388 match = MATCH
c1af2184 389 break
8a8fadda 390 else:
4bda6f45 391 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
8a8fadda 392
974408b5
ERE
393 if match == NO_MATCH:
394 return NO_MATCH
c1af2184 395
974408b5
ERE
396 # when a directory is in PARENT_MATCH, it doesn't matter if it's
397 # excluded. It's subfiles will be excluded, but the directory itself
398 # won't
399 if match != PARENT_MATCH:
8a8fadda
ERE
400 for e in self.excluded_files:
401 # it can be either a regexp or a string
be60ffd0 402 if isinstance(e, str):
8a8fadda 403 # if the string matches, then exclude
c1af2184 404 if e == path:
974408b5 405 return NO_MATCH
8a8fadda
ERE
406
407 # if the string ends with / it's a directory, and if the
408 # path starts with the directory, then exclude
c1af2184 409 if e.endswith('/') and path.startswith(e):
974408b5 410 return NO_MATCH
8a8fadda
ERE
411
412 # if the string doesn't end with /, do the same check with
413 # the slash added
c1af2184 414 elif path.startswith(e + '/'):
974408b5 415 return NO_MATCH
8a8fadda
ERE
416
417 # if it's a reg exp, then we just check if it matches
c1af2184
ERE
418 elif isinstance(e, re._pattern_type):
419 if e.match(path):
974408b5 420 return NO_MATCH
8a8fadda 421 else:
4bda6f45 422 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
8a8fadda
ERE
423
424 if self.filter_func:
425 return self.filter_func(path)
426
974408b5 427 return match
8a8fadda 428
283fbd5e 429 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
0708a374
ERE
430 '''
431 Walk a directory recursively, yielding each file/directory
0708a374
ERE
432 '''
433
283fbd5e 434 source_path = source_path.rstrip(os.sep)
0708a374 435
283fbd5e 436 if keep_base_dir:
adf7dac4 437 beginning_size = 0
283fbd5e
CH
438 else:
439 beginning_size = len(source_path) + 1 # +1 for os.sep
440
441 queue = [source_path]
442
d07c8065 443 while queue:
df86af81 444 cur_path = queue.pop(0)
0708a374 445
d86735e4
ERE
446 # it might have been removed in the mean time
447 if not os.path.exists(cur_path):
448 continue
449
7dec665c
CH
450 for filename in sorted(os.listdir(cur_path)):
451 child = os.path.join(cur_path, filename)
d07c8065
ERE
452 is_dir = os.path.isdir(child)
453 status = self.filter_path(child, source_path, is_dir)
7dec665c
CH
454 if status == NO_MATCH:
455 continue
456 if not os.access(child, os.R_OK):
4bda6f45 457 self.logger.warning('Error accessing possibly locked file %s' % child)
7dec665c 458 continue
8a8fadda 459
d07c8065 460 if status == MATCH:
adf7dac4 461 yield child[beginning_size:]
0708a374 462
d07c8065
ERE
463 if is_dir and (status == MATCH or status == PARENT_MATCH):
464 queue.append(child)
0708a374 465
e82f14f5
ERE
466 def _stat_dict(self, path):
467 '''
468 Returns a dict with the stat data used to compare files
469 '''
470 stinfo = os.stat(path)
471 mode = stinfo.st_mode
472
473 ptype = None
474 if stat.S_ISDIR(mode):
d07c8065 475 ptype = u'directory'
e82f14f5 476 elif stat.S_ISREG(mode):
d07c8065 477 ptype = u'file'
e82f14f5 478 elif stat.S_ISLNK(mode):
d07c8065 479 ptype = u'link'
e82f14f5
ERE
480
481 return {
d07c8065 482 u'type': ptype,
be60ffd0 483 u'path': path,
d07c8065 484 u'mode': mode,
0501fe0a
ERE
485 u'mtime': int(stinfo.st_mtime),
486 u'ctime': int(stinfo.st_ctime),
d07c8065
ERE
487 u'uid': stinfo.st_uid,
488 u'gid': stinfo.st_gid,
489 u'inode': stinfo.st_ino,
490 u'size': stinfo.st_size
e82f14f5
ERE
491 }
492
df99a044 493 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
d07c8065
ERE
494 '''
495 Return if the dicts are equal in the stat keys
496 '''
fc8fdcbc 497 keys = [u'type', u'mode',u'size', u'mtime',
d041935c 498 # not restored: u'inode', u'ctime'
df99a044 499 ]
8adbe50d 500
fc8fdcbc 501 # only if user is root, then also check gid/uid. otherwise do not check it,
d041935c 502 # because tarfile can chown in case of being superuser only
50d70ca9
PG
503 #
504 # also, skip the check in rpmbuild since the sources end up with the
505 # uid:gid of the packager while the extracted files are 0:0.
506 if hasattr(os, "geteuid") and os.geteuid() == 0 \
507 and os.getenv ("RPMBUILD_OPTIONS") is None:
fc8fdcbc
ERE
508 keys.append('gid')
509 keys.append('uid')
510
ea6d3c3e 511 if (not d1 and d2 != None) or (d1 != None and not d2):
8adbe50d
ERE
512 return False
513
cbac9f0b
ERE
514 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
515 return False
8adbe50d 516
fc8fdcbc
ERE
517 type = d1.get('type', '')
518
d07c8065 519 for key in keys:
fc8fdcbc
ERE
520 # size doesn't matter for directories
521 if type == 'directory' and key == 'size':
522 continue
d07c8065
ERE
523 if d1.get(key, -1) != d2.get(key, -2):
524 return False
525 return True
526
df99a044 527 def prefixed(self, path, listsnapshot_equal=False):
8adbe50d
ERE
528 '''
529 if a path is not prefixed, return it prefixed
530 '''
531 for prefix in self.__path_prefix_list:
532 if path.startswith(prefix):
df99a044
ERE
533 if listsnapshot_equal and prefix == u'list://':
534 return u'snapshot://' + path[len(prefix):]
8adbe50d
ERE
535 return path
536 return u'snapshot://' + path
537
538 def unprefixed(self, path):
539 '''
540 remove a path prefix if any
541 '''
542 for prefix in self.__path_prefix_list:
543 if path.startswith(prefix):
544 return path[len(prefix):]
545 return path
546
133d30da
PG
547
548 def initialize_encryption (self, mode):
549 password = self.password
1f3fd7b0
PG
550 key = self.crypto_key
551 nacl = self.nacl
133d30da 552
1f3fd7b0 553 if key is None and password is None:
133d30da
PG
554 return
555 if mode == CRYPTO_MODE_ENCRYPT:
1f3fd7b0
PG
556 return crypto.Encrypt (password=password,
557 key=key,
558 nacl=nacl,
54f909ca 559 version=self.crypto_version,
774ca538 560 paramversion=self.crypto_paramversion)
133d30da 561 if mode == CRYPTO_MODE_DECRYPT:
1f3fd7b0 562 return crypto.Decrypt (password=password, key=key)
133d30da
PG
563
564 raise Exception ("invalid encryption mode [%r]" % mode)
565
566
9eccb1c2 567 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
3a7e1a50 568 '''
9eccb1c2
PG
569 Given the specified configuration, opens a file for reading or writing,
570 inheriting the encryption and compression settings from the backup.
571 Returns a file object ready to use.
3fdea6d4 572
c8c72fe1
PG
573 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
574 respectively).
575 :type mode: str
774ca538
PG
576 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
577 Both the info and the auxiliary file have a globally
578 unique, constant counter value.
3fdea6d4 579 :type kind: str
3a7e1a50 580 '''
3a7e1a50
ERE
581 if self.index_mode.startswith('gz'):
582 comptype = 'gz'
583 elif self.index_mode.startswith('bz2'):
584 comptype = 'bz2'
585 else:
586 comptype = 'tar'
587
133d30da 588 crypto_ctx = None
6de9444a 589 enccounter = None
133d30da 590 if mode == "w":
774ca538 591 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 592 elif mode == "r":
774ca538 593 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
133d30da 594
3031b7ae
PG
595 if crypto_ctx is not None:
596 if kind == AUXILIARY_FILE_INFO:
597 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
598 elif kind == AUXILIARY_FILE_INDEX:
599 enccounter = crypto.AES_GCM_IV_CNT_INDEX
600 else:
601 raise Exception ("invalid kind of aux file %r" % kind)
602
c8c72fe1 603 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
3fdea6d4 604 bufsize=tarfile.RECORDSIZE, fileobj=None,
6de9444a 605 encryption=crypto_ctx, enccounter=enccounter)
c8c72fe1
PG
606
607 return sink
608
3a7e1a50 609
0708a374 610 def create_full_backup(self, source_path, backup_path,
d4a05db6 611 max_volume_size=None, extra_data=dict()):
0708a374
ERE
612 '''
613 Creates a full backup.
614
615 Parameters:
616 - source_path: source path to the directory to back up.
617 - backup_path: path where the back up will be stored. Backup path will
618 be created if not existent.
d5361dac
ERE
619 - max_volume_size: maximum volume size in megabytes. Used to split the
620 backup in volumes. Optional (won't split in volumes by default).
9eae9a1f
ERE
621 - extra_data: a json-serializable dictionary with information that you
622 want to be included in the header of the index file
0708a374
ERE
623 '''
624 # check input
be60ffd0 625 if not isinstance(source_path, str):
0708a374
ERE
626 raise Exception('Source path must be a string')
627
be60ffd0 628 if not isinstance(backup_path, str):
0708a374
ERE
629 raise Exception('Backup path must be a string')
630
631 if not os.path.exists(source_path) or not os.path.isdir(source_path):
632 raise Exception('Source path "%s" does not exist or is not a '\
633 'directory' % source_path)
634
d07c8065
ERE
635 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
636 max_volume_size < 1):
637 raise Exception('max_volume_size must be a positive integer')
d5361dac
ERE
638 if max_volume_size != None:
639 max_volume_size = max_volume_size*1024*1024
640
9eae9a1f
ERE
641 if not isinstance(extra_data, dict):
642 raise Exception('extra_data must be a dictionary')
643
644 try:
645 extra_data_str = json.dumps(extra_data)
646 except:
647 raise Exception('extra_data is not json-serializable')
648
0708a374
ERE
649 if not os.access(source_path, os.R_OK):
650 raise Exception('Source path "%s" is not readable' % source_path)
651
652 # try to create backup path if needed
653 if not os.path.exists(backup_path):
d4a05db6 654 os.makedirs(backup_path)
0708a374
ERE
655
656 if not os.access(backup_path, os.W_OK):
657 raise Exception('Backup path "%s" is not writeable' % backup_path)
658
659 if source_path.endswith('/'):
660 source_path = source_path[:-1]
661
662 if backup_path.endswith('/'):
663 backup_path = backup_path[:-1]
664
665 # update current time
666 self.current_time = datetime.datetime.now()
667
668 if self.mode not in self.__file_extensions_dict:
669 raise Exception('Unrecognized extension')
670
2ae46844 671 # setup for encrypting payload
774ca538
PG
672 if self.encryptor is None:
673 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
2ae46844 674
0708a374 675 # some initialization
11684b1d 676 self.vol_no = 0
0708a374
ERE
677
678 # generate the first volume name
679 vol_name = self.volume_name_func(backup_path, True, 0)
680 tarfile_path = os.path.join(backup_path, vol_name)
681
774ca538
PG
682 # init index
683 index_name = self.index_name_func(True)
684 index_path = os.path.join(backup_path, index_name)
685 index_sink = self.open_auxiliary_file(index_path, 'w')
e82f14f5 686
d5361dac
ERE
687 cwd = os.getcwd()
688
b7c47f38 689 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
0708a374
ERE
690 '''
691 Handles the new volumes
692 '''
d5361dac
ERE
693 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
694 volume_path = os.path.join(backup_path, volume_name)
11684b1d 695 deltarobj.vol_no = volume_number
d5361dac
ERE
696
697 # we convert relative paths into absolute because CWD is changed
698 if not os.path.isabs(volume_path):
699 volume_path = os.path.join(cwd, volume_path)
11684b1d 700
8e019196
ERE
701 if tarobj.fileobj is not None:
702 tarobj.fileobj.close()
703
b008f989
ERE
704 deltarobj.logger.debug("opening volume %s" % volume_path)
705
b7c47f38 706 tarobj.open_volume(volume_path, encryption=encryption)
d5361dac
ERE
707
708 # wraps some args from context into the handler
133d30da 709 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
0708a374 710
774ca538 711 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
6c678f3a 712
be60ffd0 713 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
6c678f3a 714 # calculate checksum and write into the stream
c2ffe2ec 715 crc = binascii.crc32(s) & 0xFFFFffff
774ca538 716 index_sink.write(s)
e82f14f5 717
0708a374
ERE
718 # start creating the tarfile
719 tarobj = tarfile.TarFile.open(tarfile_path,
da26094a 720 mode='w' + self.mode,
0708a374 721 format=tarfile.GNU_FORMAT,
d1c38f40 722 concat='#' in self.mode,
133d30da 723 encryption=self.encryptor,
0708a374 724 max_volume_size=max_volume_size,
ea625b04 725 new_volume_handler=new_volume_handler,
e2b59b34
ERE
726 save_to_members=False,
727 dereference=True)
e5c6ca04 728 os.chdir(source_path)
55b8686d
ERE
729
730 # for each file to be in the backup, do:
e82f14f5 731 for path in self._recursive_walk_dir('.'):
55b8686d 732 # calculate stat dict for current file
253d4cdd
ERE
733 statd = self._stat_dict(path)
734 statd['path'] = u'snapshot://' + statd['path']
735 statd['volume'] = self.vol_no
55b8686d
ERE
736
737 # backup file
253d4cdd 738 tarobj.add(path, arcname = statd['path'], recursive=False)
11684b1d 739
55b8686d 740 # retrieve file offset
253d4cdd 741 statd['offset'] = tarobj.get_last_member_offset()
b008f989 742 self.logger.debug("backup %s" % statd['path'])
6c678f3a 743
d041935c 744 # store the stat dict in the index
be60ffd0 745 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
6c678f3a 746 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 747 index_sink.write(s)
e82f14f5 748
be60ffd0 749 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
6c678f3a 750 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 751 index_sink.write(s)
be60ffd0 752 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
774ca538
PG
753 index_sink.write(s)
754
e5c6ca04 755 os.chdir(cwd)
0708a374 756 tarobj.close()
c8c72fe1 757 index_sink.close (close_fileobj=True)
938c2d54 758
0708a374 759 def create_diff_backup(self, source_path, backup_path, previous_index_path,
d4a05db6 760 max_volume_size=None, extra_data=dict()):
0708a374
ERE
761 '''
762 Creates a backup.
763
764 Parameters:
765 - source_path: source path to the directory to back up.
766 - backup_path: path where the back up will be stored. Backup path will
767 be created if not existent.
768 - previous_index_path: index of the previous backup, needed to know
769 which files changed since then.
770 - max_volume_size: maximum volume size in megabytes (MB). Used to split
771 the backup in volumes. Optional (won't split in volumes by default).
3a7e1a50
ERE
772
773 NOTE: previous index is assumed to follow exactly the same format as
774 the index_mode setup in the constructor.
0708a374 775 '''
d07c8065 776 # check/sanitize input
be60ffd0 777 if not isinstance(source_path, str):
d07c8065
ERE
778 raise Exception('Source path must be a string')
779
be60ffd0 780 if not isinstance(backup_path, str):
d07c8065
ERE
781 raise Exception('Backup path must be a string')
782
783 if not os.path.exists(source_path) or not os.path.isdir(source_path):
784 raise Exception('Source path "%s" does not exist or is not a '\
785 'directory' % source_path)
786
9eae9a1f
ERE
787 if not isinstance(extra_data, dict):
788 raise Exception('extra_data must be a dictionary')
789
790 try:
791 extra_data_str = json.dumps(extra_data)
792 except:
793 raise Exception('extra_data is not json-serializable')
794
d07c8065
ERE
795 if not os.access(source_path, os.R_OK):
796 raise Exception('Source path "%s" is not readable' % source_path)
797
798 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
799 max_volume_size < 1):
800 raise Exception('max_volume_size must be a positive integer')
801 if max_volume_size != None:
802 max_volume_size = max_volume_size*1024*1024
803
be60ffd0 804 if not isinstance(previous_index_path, str):
d07c8065
ERE
805 raise Exception('previous_index_path must be A string')
806
807 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
808 raise Exception('Index path "%s" does not exist or is not a '\
809 'file' % previous_index_path)
810
811 if not os.access(previous_index_path, os.R_OK):
812 raise Exception('Index path "%s" is not readable' % previous_index_path)
813
814 # try to create backup path if needed
815 if not os.path.exists(backup_path):
d4a05db6 816 os.makedirs(backup_path)
d07c8065
ERE
817
818 if not os.access(backup_path, os.W_OK):
819 raise Exception('Backup path "%s" is not writeable' % backup_path)
820
821 if source_path.endswith('/'):
822 source_path = source_path[:-1]
823
824 if backup_path.endswith('/'):
825 backup_path = backup_path[:-1]
826
827 # update current time
828 self.current_time = datetime.datetime.now()
829
830 if self.mode not in self.__file_extensions_dict:
831 raise Exception('Unrecognized extension')
832
2ae46844 833 # setup for encrypting payload
774ca538
PG
834 if self.encryptor is None:
835 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 836
d07c8065
ERE
837 # some initialization
838 self.vol_no = 0
839
840 # generate the first volume name
df86af81
ERE
841 vol_name = self.volume_name_func(backup_path, is_full=False,
842 volume_number=0)
d07c8065
ERE
843 tarfile_path = os.path.join(backup_path, vol_name)
844
938c2d54 845 # init index
d07c8065
ERE
846 cwd = os.getcwd()
847
3031b7ae
PG
848 index_name = self.index_name_func(is_full=False)
849 index_path = os.path.join(backup_path, index_name)
850 index_sink = self.open_auxiliary_file(index_path, 'w')
851
d07c8065
ERE
852 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
853 '''
854 Handles the new volumes
855 '''
df86af81
ERE
856 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
857 volume_number=volume_number)
d07c8065
ERE
858 volume_path = os.path.join(backup_path, volume_name)
859 deltarobj.vol_no = volume_number
860
861 # we convert relative paths into absolute because CWD is changed
862 if not os.path.isabs(volume_path):
863 volume_path = os.path.join(cwd, volume_path)
864
f624ff3d 865 deltarobj.logger.debug("opening volume %s" % volume_path)
d07c8065
ERE
866 tarobj.open_volume(volume_path)
867
868 # wraps some args from context into the handler
869 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
870
3031b7ae 871 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
d07c8065 872
be60ffd0 873 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
d07c8065 874 # calculate checksum and write into the stream
c2ffe2ec 875 crc = binascii.crc32(s) & 0xFFFFffff
3031b7ae 876 index_sink.write(s)
d07c8065
ERE
877
878 # start creating the tarfile
879 tarobj = tarfile.TarFile.open(tarfile_path,
880 mode='w' + self.mode,
881 format=tarfile.GNU_FORMAT,
d1c38f40 882 concat='#' in self.mode,
133d30da 883 encryption=self.encryptor,
d07c8065 884 max_volume_size=max_volume_size,
ea625b04 885 new_volume_handler=new_volume_handler,
e2b59b34
ERE
886 save_to_members=False,
887 dereference=True)
d07c8065 888
aae127d0
ERE
889
890 # create the iterators, first the previous index iterator, then the
891 # source path directory iterator and collate and iterate them
892 if not os.path.isabs(previous_index_path):
893 previous_index_path = os.path.join(cwd, previous_index_path)
894 index_it = self.iterate_index_path(previous_index_path)
895
d07c8065 896 os.chdir(source_path)
aae127d0
ERE
897 dir_it = self._recursive_walk_dir('.')
898 dir_path_it = self.jsonize_path_iterator(dir_it)
d07c8065 899
df86af81
ERE
900 def pr(path):
901 if not path:
902 return "None"
903 else:
904 return path["path"]
8edb2e3c 905
d07c8065 906 # for each file to be in the backup, do:
df86af81 907 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
aae127d0
ERE
908 action = None
909 # if file is not in the index, it means it's a new file, so we have
910 # to take a snapshot
df86af81 911
aae127d0
ERE
912 if not ipath:
913 action = 'snapshot'
914 # if the file is not in the directory iterator, it means that it has
d041935c 915 # been deleted, so we need to mark it as such
aae127d0
ERE
916 elif not dpath:
917 action = 'delete'
918 # if the file is in both iterators, it means it might have either
919 # not changed (in which case we will just list it in our index but
920 # it will not be included in the tar file), or it might have
e8d95fe5 921 # changed, in which case we will snapshot it.
aae127d0
ERE
922 elif ipath and dpath:
923 if self._equal_stat_dicts(ipath, dpath):
924 action = 'list'
925 else:
926 action = 'snapshot'
927 # TODO: when creating chained backups (i.e. diffing from another
928 # diff), we will need to detect the type of action in the previous
929 # index, because if it was delete and dpath is None, we should
930 # discard the file
931
932 if action == 'snapshot':
933 # calculate stat dict for current file
934 stat = dpath.copy()
be60ffd0 935 stat['path'] = "snapshot://" + dpath['path']
aae127d0
ERE
936 stat['volume'] = self.vol_no
937
50f43227
ERE
938 self.logger.debug("[STORE] %s" % dpath['path'])
939
aae127d0 940 # backup file
8adbe50d 941 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
aae127d0
ERE
942
943 # retrieve file offset
944 stat['offset'] = tarobj.get_last_member_offset()
aae127d0 945 elif action == 'delete':
50f43227 946 path = self.unprefixed(ipath['path'])
aae127d0 947 stat = {
50f43227 948 u'path': u'delete://' + path,
aae127d0
ERE
949 u'type': ipath['type']
950 }
50f43227 951 self.logger.debug("[DELETE] %s" % path)
aae127d0
ERE
952
953 # mark it as deleted in the backup
42d39ca7 954 tarobj.add("/dev/null", arcname=stat['path'])
aae127d0
ERE
955 elif action == 'list':
956 stat = dpath.copy()
50f43227
ERE
957 path = self.unprefixed(ipath['path'])
958 stat['path'] = u'list://' + path
aae127d0 959 # unchanged files do not enter in the backup, only in the index
50f43227 960 self.logger.debug("[UNCHANGED] %s" % path)
80910564
TJ
961 else:
962 # should not happen
4bda6f45 963 self.logger.warning('unknown action in create_diff_backup: {0}'
80910564
TJ
964 ''.format(action))
965 stat = None
aae127d0 966
80910564
TJ
967 if stat:
968 # store the stat dict in the index
be60ffd0 969 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
aae127d0 970 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 971 index_sink.write(s)
aae127d0 972
be60ffd0 973 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
aae127d0 974 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 975 index_sink.write(s)
be60ffd0 976 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
3031b7ae 977 index_sink.write(s)
938c2d54 978
df86af81 979 index_it.release()
aae127d0
ERE
980 os.chdir(cwd)
981 tarobj.close()
938c2d54
PG
982 index_sink.close()
983
984
d07c8065 985 def iterate_index_path(self, index_path):
df86af81
ERE
986 '''
987 Returns an index iterator. Internally, it uses a classic iterator class.
988 We do that instead of just yielding so that the iterator object can have
989 an additional function to close the file descriptor that is opened in
990 the constructor.
991 '''
d07c8065 992
df86af81
ERE
993 class IndexPathIterator(object):
994 def __init__(self, delta_tar, index_path):
995 self.delta_tar = delta_tar
996 self.index_path = index_path
997 self.f = None
9eae9a1f 998 self.extra_data = dict()
df86af81 999 self.__enter__()
d07c8065 1000
df86af81
ERE
1001 def __iter__(self):
1002 return self
d07c8065 1003
df86af81
ERE
1004 def release(self):
1005 if self.f:
1006 self.f.close()
1007
1008 def __enter__(self):
1009 '''
1010 Allows this iterator to be used with the "with" statement
1011 '''
1012 if self.f is None:
9eccb1c2 1013 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
df86af81
ERE
1014 # check index header
1015 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1016 if j.get("type", '') != 'python-delta-tar-index' or\
1017 j.get('version', -1) != 1:
1018 raise Exception("invalid index file format: %s" % json.dumps(j))
1019
9eae9a1f
ERE
1020 self.extra_data = j.get('extra_data', dict())
1021
df86af81
ERE
1022 # find BEGIN-FILE-LIST, ignore other headers
1023 while True:
1024 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1025 if j.get('type', '') == 'BEGIN-FILE-LIST':
1026 break
1027 return self
1028
1029 def __exit__(self, type, value, tb):
1030 '''
1031 Allows this iterator to be used with the "with" statement
1032 '''
ec57ce53
ERE
1033 if self.f:
1034 self.f.close()
df86af81 1035 self.f = None
d07c8065 1036
be60ffd0 1037 def __next__(self):
0349168a 1038 # read each file in the index and process it to do the restore
df86af81
ERE
1039 j = {}
1040 l_no = -1
1041 try:
1042 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
be60ffd0 1043 except Exception as e:
df86af81
ERE
1044 if self.f:
1045 self.f.close()
1046 raise e
d07c8065 1047
df86af81 1048 op_type = j.get('type', '')
d07c8065 1049
df86af81
ERE
1050 # when we detect the end of the list, break the loop
1051 if op_type == 'END-FILE-LIST':
1052 if self.f:
1053 self.f.close()
1054 raise StopIteration
1055
1056 # check input
1057 if op_type not in ['directory', 'file', 'link']:
4bda6f45 1058 self.delta_tar.logger.warning('unrecognized type to be '
df86af81
ERE
1059 'restored: %s, line %d' % (op_type, l_no))
1060 # iterate again
be60ffd0 1061 return self.__next__()
df86af81
ERE
1062
1063 return j, l_no
d07c8065 1064
df86af81 1065 return IndexPathIterator(self, index_path)
d07c8065 1066
26fdd428 1067 def iterate_tar_path(self, tar_path, new_volume_handler=None):
24ddf0a2
ERE
1068 '''
1069 Returns a tar iterator that iterates jsonized member items that contain
1070 an additional "member" field, used by RestoreHelper.
1071 '''
ec57ce53 1072 class TarPathIterator(object):
83a81852 1073 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
24ddf0a2 1074 self.delta_tar = delta_tar
ec57ce53 1075 self.tar_path = tar_path
24ddf0a2 1076 self.tar_obj = None
6bca471c 1077 self.last_member = None
26fdd428 1078 self.new_volume_handler = new_volume_handler
24ddf0a2
ERE
1079 self.__enter__()
1080
1081 def __iter__(self):
1082 return self
1083
1084 def release(self):
1085 if self.tar_obj:
1086 self.tar_obj.close()
1087
1088 def __enter__(self):
1089 '''
1090 Allows this iterator to be used with the "with" statement
1091 '''
1092 if self.tar_obj is None:
d5e1d60f
PG
1093 decryptor = None
1094 if self.delta_tar.password is not None:
1f3fd7b0
PG
1095 decryptor = crypto.Decrypt \
1096 (password=self.delta_tar.password,
1097 key=self.delta_tar.crypto_key)
ec57ce53
ERE
1098 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1099 mode='r' + self.delta_tar.mode,
1100 format=tarfile.GNU_FORMAT,
d1c38f40 1101 concat='#' in self.delta_tar.mode,
d5e1d60f 1102 encryption=decryptor,
83a81852 1103 new_volume_handler=self.new_volume_handler,
e2b59b34
ERE
1104 save_to_members=False,
1105 dereference=True)
24ddf0a2
ERE
1106 return self
1107
1108 def __exit__(self, type, value, tb):
1109 '''
1110 Allows this iterator to be used with the "with" statement
1111 '''
ec57ce53
ERE
1112 if self.tar_obj:
1113 self.tar_obj.close()
24ddf0a2
ERE
1114 self.tar_obj = None
1115
be60ffd0 1116 def __next__(self):
24ddf0a2
ERE
1117 '''
1118 Read each member and return it as a stat dict
1119 '''
be60ffd0 1120 tarinfo = self.tar_obj.__iter__().__next__()
8e019196
ERE
1121 # NOTE: here we compare if tarinfo.path is the same as before
1122 # instead of comparing the tarinfo object itself because the
1123 # object itself might change for multivol tarinfos
1124 if tarinfo is None or (self.last_member is not None and\
1125 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
ec57ce53
ERE
1126 raise StopIteration
1127
6bca471c
ERE
1128 self.last_member = tarinfo
1129
24ddf0a2
ERE
1130 ptype = 'unknown'
1131 if tarinfo.isfile():
1132 ptype = 'file'
1133 elif tarinfo.isdir():
ab7e7465 1134 ptype = 'directory'
24ddf0a2
ERE
1135 elif tarinfo.islnk() or tarinfo.issym():
1136 ptype = 'link'
1137
1138 return {
1139 u'type': ptype,
1140 u'path': tarinfo.path,
1141 u'mode': tarinfo.mode,
1142 u'mtime': tarinfo.mtime,
1143 u'ctime': -1, # cannot restore
1144 u'uid': tarinfo.uid,
1145 u'gid': tarinfo.gid,
1146 u'inode': -1, # cannot restore
1147 u'size': tarinfo.size,
1148 u'member': tarinfo
ec57ce53
ERE
1149 }, 0
1150
26fdd428 1151 return TarPathIterator(self, tar_path, new_volume_handler)
24ddf0a2 1152
df99a044 1153 def jsonize_path_iterator(self, iter, strip=0):
d07c8065
ERE
1154 '''
1155 converts the yielded items of an iterator into json path lines.
df99a044
ERE
1156
1157 strip: Strip the smallest prefix containing num leading slashes from
1158 the file path.
d07c8065
ERE
1159 '''
1160 while True:
1161 try:
be60ffd0 1162 path = iter.__next__()
df99a044 1163 if strip == 0:
4ac6d333 1164 yield self._stat_dict(path), 0
df99a044
ERE
1165 else:
1166 st = self._stat_dict(path)
1167 st['path'] = "/".join(path.split("/")[strip:])
4ac6d333 1168 yield st, 0
d07c8065
ERE
1169 except StopIteration:
1170 break
1171
b84beea7
PG
1172 def iterate_disaster_index (self, index):
1173 """
1174 Mimick the behavior of the other object iterators, just with the inputs
1175 supplied directly as *index*.
1176 """
1177
1178 class RawIndexIterator(object):
65b35c42 1179 def __init__(self, delta_tar, index):
b84beea7
PG
1180 self.delta_tar = delta_tar
1181 self.index = index
1182 self.__enter__()
1183
1184 def __iter__(self):
1185 return self
1186
1187 def release(self):
65b35c42 1188 pass
b84beea7
PG
1189
1190 def __enter__(self):
1191 '''
1192 Allows this iterator to be used with the "with" statement
1193 '''
1194 self.iter = self.index.__iter__ ()
1195 return self
1196
1197 def __exit__(self, type, value, tb):
1198 '''
1199 Allows this iterator to be used with the "with" statement
1200 '''
1201
1202 def __next__(self):
1203 idxent = self.iter.__next__ ()
65b35c42 1204 return idxent, 0
b84beea7
PG
1205
1206 return RawIndexIterator(self, index)
1207
d07c8065
ERE
1208 def collate_iterators(self, it1, it2):
1209 '''
1210 Collate two iterators, so that it returns pairs of the items of each
1211 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1212 when there's no match for the items in the other iterator.
1213
1214 It assumes that the items in both lists are ordered in the same way.
1215 '''
ea6d3c3e 1216 l_no = 0
d07c8065
ERE
1217 elem1, elem2 = None, None
1218 while True:
1219 if not elem1:
1220 try:
be60ffd0 1221 elem1, l_no = it1.__next__()
d07c8065
ERE
1222 except StopIteration:
1223 if elem2:
ea6d3c3e 1224 yield (None, elem2, l_no)
d07c8065 1225 for elem2 in it2:
ea6d3c3e
ERE
1226 if isinstance(elem2, tuple):
1227 elem2 = elem2[0]
1228 yield (None, elem2, l_no)
d07c8065 1229 break
d07c8065
ERE
1230 if not elem2:
1231 try:
be60ffd0 1232 elem2 = it2.__next__()
d07c8065
ERE
1233 if isinstance(elem2, tuple):
1234 elem2 = elem2[0]
1235 except StopIteration:
1236 if elem1:
ea6d3c3e 1237 yield (elem1, None, l_no)
df99a044 1238 for elem1, l_no in it1:
ea6d3c3e 1239 yield (elem1, None, l_no)
d07c8065 1240 break
670f9934
ERE
1241
1242 index1 = self.unprefixed(elem1['path'])
1243 index2 = self.unprefixed(elem2['path'])
1244 i1, i2 = self.compare_indexes(index1, index2)
1245
1246 yield1 = yield2 = None
1247 if i1 is not None:
1248 yield1 = elem1
1249 elem1 = None
1250 if i2 is not None:
1251 yield2 = elem2
1252 elem2 = None
1253 yield (yield1, yield2, l_no)
1254
1255 def compare_indexes(self, index1, index2):
1256 '''
1257 Compare iterator indexes and return a tuple in the following form:
1258 if index1 < index2, returns (index1, None)
1259 if index1 == index2 returns (index1, index2)
1260 else: returns (None, index2)
1261 '''
1262 l1 = index1.split('/')
1263 l2 = index2.split('/')
1264 length = len(l2) - len(l1)
1265
1266 if length > 0:
1267 return (index1, None)
1268 elif length < 0:
1269 return (None, index2)
1270
1271 for i1, i2 in zip(l1, l2):
1272 if i1 < i2:
1273 return (index1, None)
1274 elif i1 > i2:
1275 return (None, index2)
1276
1277 return (index1, index2)
0708a374 1278
8c65a2b1 1279 def list_backup(self, backup_tar_path, list_func=None):
be60ffd0 1280 if not isinstance(backup_tar_path, str):
8c65a2b1
ERE
1281 raise Exception('Backup tar path must be a string')
1282
1283 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1284 raise Exception('Source path "%s" does not exist or is not a '\
1285 'file' % backup_tar_path)
1286
1287 if not os.access(backup_tar_path, os.R_OK):
1288 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1289
1290 cwd = os.getcwd()
1291
b7c47f38 1292 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
8c65a2b1
ERE
1293 '''
1294 Handles the new volumes
1295 '''
1296 volume_name = deltarobj.volume_name_func(backup_path, True,
1297 volume_number, guess_name=True)
1298 volume_path = os.path.join(backup_path, volume_name)
1299
1300 # we convert relative paths into absolute because CWD is changed
1301 if not os.path.isabs(volume_path):
1302 volume_path = os.path.join(cwd, volume_path)
b7c47f38
PG
1303 tarobj.open_volume(volume_path, encryption=encryption)
1304
774ca538
PG
1305 if self.decryptor is None:
1306 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
8c65a2b1
ERE
1307
1308 backup_path = os.path.dirname(backup_tar_path)
1309 if not os.path.isabs(backup_path):
1310 backup_path = os.path.join(cwd, backup_path)
133d30da 1311 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
b7a6566b 1312
8c65a2b1
ERE
1313 tarobj = tarfile.TarFile.open(backup_tar_path,
1314 mode='r' + self.mode,
1315 format=tarfile.GNU_FORMAT,
d1c38f40 1316 concat='#' in self.mode,
133d30da 1317 encryption=self.decryptor,
ea625b04 1318 new_volume_handler=new_volume_handler,
e2b59b34
ERE
1319 save_to_members=False,
1320 dereference=True)
8c65a2b1
ERE
1321
1322 def filter(cls, list_func, tarinfo):
1323 if list_func is None:
b008f989 1324 self.logger.info(tarinfo.path)
8c65a2b1
ERE
1325 else:
1326 list_func(tarinfo)
1327 return False
1328 filter = partial(filter, self, list_func)
1329
1330 tarobj.extractall(filter=filter)
1331 tarobj.close()
1332
0708a374 1333 def restore_backup(self, target_path, backup_indexes_paths=[],
e93f83f1 1334 backup_tar_path=None, restore_callback=None,
b84beea7 1335 disaster=tarfile.TOLERANCE_STRICT, backup_index=None):
0708a374
ERE
1336 '''
1337 Restores a backup.
1338
1339 Parameters:
0708a374
ERE
1340 - target_path: path to restore.
1341 - backup_indexes_paths: path to backup indexes, in descending date order.
1342 The indexes indicate the location of their respective backup volumes,
1343 and multiple indexes are needed to be able to restore diff backups.
1344 Note that this is an optional parameter: if not suplied, it will
1345 try to restore directly from backup_tar_path.
1346 - backup_tar_path: path to the backup tar file. Used as an alternative
1347 to backup_indexes_paths to restore directly from a tar file without
1348 using any file index. If it's a multivol tarfile, volume_name_func
1349 will be called.
4da27cfe 1350 - restore_callback: callback function to be called during restore.
b0aef801 1351 This is passed to the helper and gets called for every file.
11684b1d 1352
3a7e1a50 1353 NOTE: If you want to use an index to restore a backup, this function
11684b1d
ERE
1354 only supports to do so when the tarfile mode is either uncompressed or
1355 uses concat compress mode, because otherwise it would be very slow.
3a7e1a50
ERE
1356
1357 NOTE: Indices are assumed to follow the same format as the index_mode
1358 specified in the constructor.
e93f83f1
PG
1359
1360 Returns the list of files that could not be restored, if there were
1361 any.
0708a374 1362 '''
11684b1d 1363 # check/sanitize input
be60ffd0 1364 if not isinstance(target_path, str):
e5c6ca04
ERE
1365 raise Exception('Target path must be a string')
1366
11684b1d
ERE
1367 if backup_indexes_paths is None and backup_tar_path == []:
1368 raise Exception("You have to either provide index paths or a tar path")
e5c6ca04 1369
b84beea7
PG
1370 if isinstance (backup_index, list) is True:
1371 mode = "disaster"
1372 elif len(backup_indexes_paths) == 0:
ea6d3c3e
ERE
1373 mode = "tar"
1374 else:
1375 mode = "diff"
1376
1377 if mode == "tar":
be60ffd0 1378 if not isinstance(backup_tar_path, str):
11684b1d
ERE
1379 raise Exception('Backup tar path must be a string')
1380
1381 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1382 raise Exception('Source path "%s" does not exist or is not a '\
1383 'file' % backup_tar_path)
1384
1385 if not os.access(backup_tar_path, os.R_OK):
1386 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1387 else:
1388 if not isinstance(backup_indexes_paths, list):
1389 raise Exception('backup_indexes_paths must be a list')
1390
1391 if self.mode.startswith(':') or self.mode.startswith('|'):
1392 raise Exception('Restore only supports either uncompressed tars'
1393 ' or concat compression when restoring from an index, and '
1394 ' the open mode you provided is "%s"' % self.mode)
1395
1396 for index in backup_indexes_paths:
be60ffd0 1397 if not isinstance(index, str):
11684b1d 1398 raise Exception('indices must be strings')
e5c6ca04 1399
11684b1d
ERE
1400 if not os.path.exists(index) or not os.path.isfile(index):
1401 raise Exception('Index path "%s" does not exist or is not a '\
1402 'file' % index)
1403
1404 if not os.access(index, os.R_OK):
1405 raise Exception('Index path "%s" is not readable' % index)
e5c6ca04
ERE
1406
1407 # try to create backup path if needed
1408 if not os.path.exists(target_path):
1409 os.makedirs(target_path)
1410
ec57ce53
ERE
1411 # make backup_tar_path absolute so that iterate_tar_path works fine
1412 if backup_tar_path and not os.path.isabs(backup_tar_path):
1413 backup_tar_path = os.path.abspath(backup_tar_path)
1414
d5361dac 1415 cwd = os.getcwd()
ec57ce53 1416 os.chdir(target_path)
d5361dac 1417
2ae46844 1418 # setup for decrypting payload
774ca538
PG
1419 if self.decryptor is None:
1420 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
2ae46844 1421
ea6d3c3e 1422 if mode == 'tar':
24ddf0a2
ERE
1423 index_it = self.iterate_tar_path(backup_tar_path)
1424 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
ec57ce53 1425 tarobj=index_it.tar_obj)
ea6d3c3e 1426 elif mode == "diff":
04f4c7ab
PG
1427 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1428 disaster=disaster)
f3d10816
PG
1429 try:
1430 # get iterator from newest index at _data[0]
1431 index1 = helper._data[0]["path"]
1432 index_it = self.iterate_index_path(index1)
1433 except tarfile.DecryptionError as exn:
1434 self.logger.error("failed to decrypt file [%s]: %s; is this an "
afc87ebc
PG
1435 "actual encrypted index file?"
1436 % (index1, str (exn)))
1437 return [(index1, exn)]
1438 except Exception as exn:
1439 # compressed files
1440 self.logger.error("failed to read file [%s]: %s; is this an "
1441 "actual index file?" % (index1, str (exn)))
f3d10816 1442 return [(index1, exn)]
b84beea7
PG
1443 elif mode == "disaster":
1444 index_it = self.iterate_disaster_index (backup_index)
65b35c42
PG
1445 helper = RestoreHelper (self, cwd, backup_path=backup_tar_path,
1446 backup_index=backup_index,
1447 disaster=disaster)
b84beea7 1448
d07c8065 1449
24ddf0a2
ERE
1450 dir_it = self._recursive_walk_dir('.')
1451 dir_path_it = self.jsonize_path_iterator(dir_it)
11684b1d 1452
e93f83f1
PG
1453 failed = [] # irrecoverable files
1454
a395759e 1455 # for each file to be restored, do:
24ddf0a2
ERE
1456 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1457 if not ipath:
1458 upath = dpath['path']
1459 op_type = dpath['type']
1460 else:
1461 upath = self.unprefixed(ipath['path'])
1462 op_type = ipath['type']
42c04ead 1463
24ddf0a2 1464 # filter paths
75059f3c 1465 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
24ddf0a2 1466 continue
ea6d3c3e 1467
24ddf0a2
ERE
1468 # if types of the file mismatch, the file needs to be deleted
1469 # and re-restored
1470 if ipath is not None and dpath is not None and\
1471 dpath['type'] != ipath['type']:
1472 helper.delete(upath)
1473
1474 # if file not found in dpath, we can directly restore from index
1475 if not dpath:
1476 # if the file doesn't exist and it needs to be deleted, it
1477 # means that work is already done
1478 if ipath['path'].startswith('delete://'):
ea6d3c3e 1479 continue
24ddf0a2 1480 try:
b008f989 1481 self.logger.debug("restore %s" % ipath['path'])
4da27cfe 1482 helper.restore(ipath, l_no, restore_callback)
be60ffd0 1483 except Exception as e:
e93f83f1 1484 iipath = ipath.get ("path", "")
7b07645e 1485 self.logger.error("FAILED to restore: {} ({})"
e93f83f1 1486 .format(iipath, e))
04f4c7ab 1487 if disaster != tarfile.TOLERANCE_STRICT:
e93f83f1 1488 failed.append ((iipath, e))
24ddf0a2 1489 continue
11684b1d 1490
24ddf0a2
ERE
1491 # if both files are equal, we have nothing to restore
1492 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1493 continue
1494
1495 # we have to restore the file, but first we need to delete the
1496 # current existing file.
1497 # we don't delete the file if it's a directory, because it might
1498 # just have changed mtime, so it's quite inefficient to remove
1499 # it
1500 if ipath:
1501 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
42c04ead 1502 helper.delete(upath)
b008f989 1503 self.logger.debug("restore %s" % ipath['path'])
e93f83f1
PG
1504 try:
1505 helper.restore(ipath, l_no, restore_callback)
1506 except Exception as e:
04f4c7ab 1507 if disaster == tarfile.TOLERANCE_STRICT:
e93f83f1
PG
1508 raise
1509 failed.append ((ipath.get ("path", ""), e))
1510 continue
24ddf0a2
ERE
1511
1512 # if the file is not in the index (so it comes from the target
1513 # directory) then we have to delete it
1514 else:
c9d47a03 1515 self.logger.debug("delete %s" % upath)
24ddf0a2 1516 helper.delete(upath)
42c04ead 1517
ec57ce53
ERE
1518 helper.restore_directories_permissions()
1519 index_it.release()
1520 os.chdir(cwd)
1521 helper.cleanup()
ea6d3c3e 1522
e93f83f1
PG
1523 return failed
1524
1525
1526 def recover_backup(self, target_path, backup_indexes_paths=[],
1527 restore_callback=None):
1528 """
1529 Walk the index, extracting objects in disaster mode. Bad files are
1530 reported along with a reason.
1531 """
1532 return self.restore_backup(target_path,
1533 backup_indexes_paths=backup_indexes_paths,
04f4c7ab
PG
1534 disaster=tarfile.TOLERANCE_RECOVER)
1535
1536
6690f5e0 1537 def rescue_backup(self, target_path, backup_tar_path,
04f4c7ab
PG
1538 restore_callback=None):
1539 """
1540 More aggressive “unfsck” mode: do not rely on the index data as the
1541 files may be corrupt; skim files for header-like information and
1542 attempt to retrieve the data.
1543 """
b84beea7
PG
1544 backup_index = tarfile.gen_rescue_index(backup_tar_path,
1545 self.mode,
1546 password=self.password,
1547 key=self.crypto_key)
6690f5e0 1548
04f4c7ab 1549 return self.restore_backup(target_path,
b84beea7 1550 backup_index=backup_index,
65b35c42 1551 backup_tar_path=backup_tar_path,
04f4c7ab 1552 disaster=tarfile.TOLERANCE_RESCUE)
e93f83f1
PG
1553
1554
11684b1d
ERE
1555 def _parse_json_line(self, f, l_no):
1556 '''
ee0e095f 1557 Read line from file like object and process it as JSON.
11684b1d
ERE
1558 '''
1559 l = f.readline()
1560 l_no += 1
1561 try:
be60ffd0 1562 j = json.loads(l.decode('UTF-8'))
ee0e095f
PG
1563 except UnicodeDecodeError as e:
1564 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1565 raise Exception \
1566 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1567 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1568 from e
1569 raise Exception \
1570 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1571 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1572 from e
be60ffd0 1573 except ValueError as e:
11684b1d
ERE
1574 raise Exception("error parsing this json line "
1575 "(line number %d): %s" % (l_no, l))
1576 return j, l_no
ea6d3c3e 1577
24ddf0a2 1578
ea6d3c3e
ERE
1579class RestoreHelper(object):
1580 '''
1581 Class used to help to restore files from indices
1582 '''
1583
1584 # holds the dicts of data
1585 _data = []
1586
1587 _deltatar = None
1588
1589 _cwd = None
1590
0501fe0a
ERE
1591 # list of directories to be restored. This is done as a last step, see
1592 # tarfile.extractall for details.
1593 _directories = []
1594
04f4c7ab 1595 _disaster = tarfile.TOLERANCE_STRICT
e93f83f1 1596
037994ca 1597 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
65b35c42
PG
1598 backup_index=None, tarobj=None,
1599 disaster=tarfile.TOLERANCE_STRICT):
ea6d3c3e
ERE
1600 '''
1601 Constructor opens the tars and init the data structures.
1602
037994ca
PG
1603 Assumptions:
1604
1605 - Index list must be provided in reverse order (newer first).
1606 - “newer first” apparently means that if there are n backups
1607 provided, the last full backup is at index n-1 and the most recent
1608 diff backup is at index 0.
1609 - Only the first, the second, and the last elements of
1610 ``index_list`` are relevant, others will not be accessed.
1611 - If no ``index_list`` is provided, both ``tarobj`` and
1612 ``backup_path`` must be passed.
1613 - If ``index_list`` is provided, the values of ``tarobj`` and
1614 ``backup_path`` are ignored.
ea6d3c3e
ERE
1615 '''
1616 self._data = []
0501fe0a 1617 self._directories = []
ea6d3c3e
ERE
1618 self._deltatar = deltatar
1619 self._cwd = cwd
3031b7ae 1620 self._password = deltatar.password
1f3fd7b0 1621 self._crypto_key = deltatar.crypto_key
3031b7ae 1622 self._decryptors = []
e93f83f1 1623 self._disaster = disaster
ea6d3c3e 1624
253d4cdd
ERE
1625 try:
1626 import grp, pwd
1627 except ImportError:
1628 grp = pwd = None
1629
1630 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1631 self.canchown = True
1632 else:
1633 self.canchown = False
1634
65b35c42 1635 if isinstance (backup_index, list) is True:
001bd488 1636 decryptor = self._deltatar.decryptor
65b35c42
PG
1637 self._data = \
1638 [{ "curr_vol_no" : None
1639 , "vol_fd" : None
1640 , "offset" : -1
1641 , "tarobj" : None
1642 , "path" : backup_path
1643 , "is_full" : True
1644 , "iterator" : None
1645 , "last_itelement" : None
1646 , "last_lno" : 0
001bd488
PG
1647 , "new_volume_handler" :
1648 partial(self.new_volume_handler,
1649 self._deltatar, self._cwd, True,
1650 os.path.dirname(backup_path), decryptor)
1651 , "decryptor" : decryptor
65b35c42
PG
1652 }]
1653 elif index_list is not None:
24ddf0a2 1654 for index in index_list:
037994ca 1655 is_full = index == index_list[-1]
24ddf0a2 1656
d5e1d60f 1657 decryptor = None
3031b7ae 1658 if self._password is not None:
1f3fd7b0
PG
1659 decryptor = crypto.Decrypt (password=self._password,
1660 key=self._crypto_key)
d5e1d60f 1661
24ddf0a2
ERE
1662 # make paths absolute to avoid cwd problems
1663 if not os.path.isabs(index):
1664 index = os.path.normpath(os.path.join(cwd, index))
1665
1666 s = dict(
1667 curr_vol_no = None,
1668 vol_fd = None,
1669 offset = -1,
1670 tarobj = None,
1671 path = index,
1672 is_full = is_full,
1673 iterator = None,
1674 last_itelement = None,
1675 last_lno = 0,
1676 new_volume_handler = partial(self.new_volume_handler,
1677 self._deltatar, self._cwd, is_full,
d5e1d60f
PG
1678 os.path.dirname(index), decryptor),
1679 decryptor = decryptor
24ddf0a2
ERE
1680 )
1681 self._data.append(s)
1682 else:
ea6d3c3e 1683 # make paths absolute to avoid cwd problems
24ddf0a2
ERE
1684 if not os.path.isabs(backup_path):
1685 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
ea6d3c3e 1686
ec57ce53
ERE
1687 # update the new_volume_handler of tar_obj
1688 tarobj.new_volume_handler = partial(self.new_volume_handler,
b7c47f38 1689 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
133d30da 1690 self._deltatar.decryptor)
ea6d3c3e
ERE
1691 s = dict(
1692 curr_vol_no = None,
1693 vol_fd = None,
1694 offset = -1,
24ddf0a2
ERE
1695 tarobj = tarobj,
1696 path = backup_path,
1697 is_full = True,
670f9934
ERE
1698 iterator = None,
1699 last_itelement = None,
1700 last_lno = 0,
d5e1d60f
PG
1701 new_volume_handler = tarobj.new_volume_handler,
1702 decryptor = self._deltatar.decryptor
ea6d3c3e
ERE
1703 )
1704 self._data.append(s)
1705
3031b7ae 1706
ea6d3c3e
ERE
1707 def cleanup(self):
1708 '''
1709 Closes all open files
1710 '''
1711 for data in self._data:
55b2ffd0
ERE
1712 if data['vol_fd']:
1713 data['vol_fd'].close()
1714 data['vol_fd'] = None
ea6d3c3e
ERE
1715 if data['tarobj']:
1716 data['tarobj'].close()
1717 data['tarobj'] = None
ea6d3c3e
ERE
1718
1719 def delete(self, path):
1720 '''
1721 Delete a file
1722 '''
df99a044
ERE
1723 if not os.path.exists(path):
1724 return
1725
24ddf0a2 1726 # to preserve parent directory mtime, we save it
283fbd5e 1727 parent_dir = os.path.dirname(path) or os.getcwd()
24ddf0a2
ERE
1728 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1729
561bc39f 1730 if os.path.isdir(path) and not os.path.islink(path):
ea6d3c3e
ERE
1731 shutil.rmtree(path)
1732 else:
1733 os.unlink(path)
1734
24ddf0a2
ERE
1735 # now we restore parent_directory mtime
1736 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1737
4da27cfe 1738 def restore(self, itpath, l_no, callback=None):
ea6d3c3e 1739 '''
8a54d5dd 1740 Restore the path from the appropriate backup. Receives the current path
e8d95fe5 1741 from the newest (=first) index iterator. itpath must be not null.
b0aef801 1742 callback is a custom function that gets called for every file.
037994ca
PG
1743
1744 NB: This function takes the attribute ``_data`` as input but will only
1745 ever use its first and, if available, second element. Anything else in
1746 ``._data[]`` will be ignored.
ea6d3c3e 1747 '''
ea6d3c3e
ERE
1748 path = itpath['path']
1749
4da27cfe
SA
1750 # Calls the callback function
1751 if callback:
1752 callback()
1753
ea6d3c3e 1754 if path.startswith('delete://'):
df86af81
ERE
1755 # the file has previously been deleted already in restore_backup in
1756 # all cases so we just need to finish
ea6d3c3e 1757 return
df86af81 1758
e8d95fe5 1759 # get data from newest index (_data[0])
df86af81
ERE
1760 data = self._data[0]
1761 upath = self._deltatar.unprefixed(path)
1762
24ddf0a2 1763 # to preserve parent directory mtime, we save it
283fbd5e 1764 parent_dir = os.path.dirname(upath) or os.getcwd()
ec57ce53
ERE
1765 if not os.path.exists(parent_dir):
1766 os.makedirs(parent_dir)
24ddf0a2
ERE
1767 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1768
e8d95fe5 1769 # if path is found in the newest index as to be snapshotted, deal with it
df86af81
ERE
1770 # and finish
1771 if path.startswith('snapshot://'):
65b35c42 1772 self.restore_file(itpath, data, path, l_no, upath)
24ddf0a2
ERE
1773
1774 # now we restore parent_directory mtime
1775 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
ea6d3c3e
ERE
1776 return
1777
1778 # we go from index to index, finding the path in the index, then finding
1779 # the index with the most recent snapshot of the file being restored
e8d95fe5
TJ
1780 #
1781 # Right now we support diff backups, only. No incremental backups.
1782 # As a result _data[0] is always the diff backup index
1783 # and _data[1] the full backup index.
527670c4 1784 if len(self._data) == 2:
7273719c 1785 data = self._data[1]
527670c4
TJ
1786 d, l_no, dpath = self.find_path_in_index(data, upath)
1787 if not d:
1788 self._deltatar.logger.warning('Error restoring file %s from '
1789 'index, not found in index %s' % (path, data['path']))
1790 return
1791
1792 cur_path = d.get('path', '')
1793 if cur_path.startswith('delete://'):
1794 self._deltatar.logger.warning(('Strange thing happened, file '
1795 '%s was listed in first index but deleted by another '
1796 'one. Path was ignored and untouched.') % path)
1797 return
1798 elif cur_path.startswith('snapshot://'):
1799 # this code path is reached when the file is unchanged
1800 # in the newest index and therefore of type 'list://'
1801 self.restore_file(d, data, path, l_no, dpath)
1802
1803 # now we restore parent_directory mtime
1804 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1805 return
1806
1807 # error code path is reached when:
1808 # a) we have more than two indexes (unsupported atm)
1809 # b) both indexes contain a list:// entry (logic error)
1810 # c) we have just one index and it also contains list://
4bda6f45 1811 self._deltatar.logger.warning(('Error restoring file %s from index, '
ea6d3c3e
ERE
1812 'snapshot not found in any index') % path)
1813
670f9934
ERE
1814 def find_path_in_index(self, data, upath):
1815 # NOTE: we restart the iterator sometimes because the iterator can be
1816 # walked over completely multiple times, for example if one path if not
1817 # found in one index and we have to go to the next index.
7273719c
PG
1818 it = data['iterator']
1819 if it is None:
670f9934 1820 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
be60ffd0 1821 d, l_no = it.__next__()
670f9934 1822 else:
670f9934
ERE
1823 d = data['last_itelement']
1824 l_no = data['last_lno']
1825
670f9934 1826 while True:
7273719c 1827 dpath = self._deltatar.unprefixed(d.get('path', ''))
670f9934
ERE
1828 if upath == dpath:
1829 data['last_itelement'] = d
1830 data['last_lno'] = l_no
1831 return d, l_no, dpath
1832
1833 up, dp = self._deltatar.compare_indexes(upath, dpath)
1834 # any time upath should have appeared before current dpath, it means
1835 # upath is just not in this index and we should stop
1836 if dp is None:
1837 data['last_itelement'] = d
1838 data['last_lno'] = l_no
1839 return None, 0, ''
1840
1841 try:
be60ffd0 1842 d, l_no = it.__next__()
670f9934
ERE
1843 except StopIteration:
1844 data['last_itelement'] = d
1845 data['last_lno'] = l_no
1846 return None, 0, ''
670f9934 1847
0501fe0a
ERE
1848 def restore_directories_permissions(self):
1849 '''
1850 Restore directory permissions when everything have been restored
1851 '''
42c04ead
ERE
1852 try:
1853 import grp, pwd
1854 except ImportError:
1855 grp = pwd = None
1856
0501fe0a
ERE
1857 self._directories.sort(key=operator.attrgetter('name'))
1858 self._directories.reverse()
0501fe0a
ERE
1859
1860 # Set correct owner, mtime and filemode on directories.
1861 for member in self._directories:
1862 dirpath = member.name
1863 try:
42c04ead
ERE
1864 os.chmod(dirpath, member.mode)
1865 os.utime(dirpath, (member.mtime, member.mtime))
253d4cdd 1866 if self.canchown:
42c04ead
ERE
1867 # We have to be root to do so.
1868 try:
1869 g = grp.getgrnam(member.gname)[2]
1870 except KeyError:
1871 g = member.gid
1872 try:
1873 u = pwd.getpwnam(member.uname)[2]
1874 except KeyError:
1875 u = member.uid
1876 try:
4e433e00 1877 if member.issym and hasattr(os, "lchown"):
42c04ead
ERE
1878 os.lchown(dirpath, u, g)
1879 else:
1880 os.chown(dirpath, u, g)
1881 except EnvironmentError:
1882 raise tarfile.ExtractError("could not change owner")
1883
be60ffd0 1884 except tarfile.ExtractError as e:
4bda6f45 1885 self._deltatar.logger.warning('tarfile: %s' % e)
0501fe0a 1886
df86af81 1887 @staticmethod
b7c47f38 1888 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
ea6d3c3e
ERE
1889 '''
1890 Handles the new volumes
1891 '''
df86af81
ERE
1892 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1893 volume_number, guess_name=True)
ea6d3c3e
ERE
1894 volume_path = os.path.join(backup_path, volume_name)
1895
1896 # we convert relative paths into absolute because CWD is changed
1897 if not os.path.isabs(volume_path):
1898 volume_path = os.path.join(cwd, volume_path)
b7c47f38 1899 tarobj.open_volume(volume_path, encryption=encryption)
ea6d3c3e 1900
253d4cdd 1901 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
ea6d3c3e
ERE
1902 '''
1903 Restores a snapshot of a file from a specific backup
1904 '''
ea6d3c3e 1905 op_type = file_data.get('type', -1)
24ddf0a2 1906 member = file_data.get('member', None)
9f9ae874 1907 ismember = bool(member)
24ddf0a2
ERE
1908
1909 # when member is set, then we can assume everything is right and we
1910 # just have to restore the path
a2a37de7 1911 if member is None:
24ddf0a2
ERE
1912 vol_no = file_data.get('volume', -1)
1913 # sanity check
1914 if not isinstance(vol_no, int) or vol_no < 0:
4bda6f45 1915 self._deltatar.logger.warning('unrecognized type to be restored: '
24ddf0a2
ERE
1916 '%s, line %d' % (op_type, l_no))
1917
1918 # setup the volume that needs to be read. only needed when member is
1919 # not set
a2a37de7 1920 if index_data['curr_vol_no'] != vol_no:
24ddf0a2
ERE
1921 index_data['curr_vol_no'] = vol_no
1922 backup_path = os.path.dirname(index_data['path'])
1923 vol_name = self._deltatar.volume_name_func(backup_path,
1924 index_data['is_full'], vol_no, guess_name=True)
1925 vol_path = os.path.join(backup_path, vol_name)
1926 if index_data['vol_fd']:
1927 index_data['vol_fd'].close()
be60ffd0 1928 index_data['vol_fd'] = open(vol_path, 'rb')
24ddf0a2
ERE
1929
1930 # force reopen of the tarobj because of new volume
1931 if index_data['tarobj']:
1932 index_data['tarobj'].close()
1933 index_data['tarobj'] = None
1934
1935 # seek tarfile if needed
1936 offset = file_data.get('offset', -1)
ea6d3c3e 1937 if index_data['tarobj']:
c52fd26b 1938 if self._disaster == tarfile.TOLERANCE_RESCUE:
24ddf0a2
ERE
1939 # force a seek and reopen
1940 index_data['tarobj'].close()
1941 index_data['tarobj'] = None
c52fd26b
PG
1942 else:
1943 try:
1944 member = index_data['tarobj'].__iter__().__next__()
1945 except tarfile.DecryptionError:
1946 pass
1947 except tarfile.CompressionError:
1948 pass
1949
1950 if not member or member.path != file_data['path']:
1951 # force a seek and reopen
1952 index_data['tarobj'].close()
1953 index_data['tarobj'] = None
1954
24ddf0a2
ERE
1955
1956 # open the tarfile if needed
1957 if not index_data['tarobj']:
1958 index_data['vol_fd'].seek(offset)
1959 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1960 fileobj=index_data['vol_fd'],
1961 format=tarfile.GNU_FORMAT,
d1c38f40 1962 concat='#' in self._deltatar.mode,
d5e1d60f 1963 encryption=index_data["decryptor"],
253d4cdd 1964 new_volume_handler=index_data['new_volume_handler'],
044585c6 1965 save_to_members=False,
04f4c7ab 1966 tolerance=self._disaster)
24ddf0a2 1967
be60ffd0 1968 member = index_data['tarobj'].__iter__().__next__()
ea6d3c3e 1969
253d4cdd
ERE
1970 member.path = unprefixed_path
1971 member.name = unprefixed_path
0501fe0a
ERE
1972
1973 if op_type == 'directory':
253d4cdd 1974 self.add_member_dir(member)
0501fe0a 1975 member = copy.copy(member)
be60ffd0 1976 member.mode = 0o0700
0501fe0a 1977
df86af81
ERE
1978 # if it's an existing directory, we then don't need to recreate it
1979 # just set the right permissions, mtime and that kind of stuff
1980 if os.path.exists(member.path):
1981 return
1982
9f9ae874 1983 if not ismember:
24ddf0a2
ERE
1984 # set current volume number in tarobj, otherwise the extraction of the
1985 # file might fail when trying to extract a multivolume member
1986 index_data['tarobj'].volume_number = index_data['curr_vol_no']
86a6e741 1987
9b13f5c4
PG
1988 def ignore_symlink (member, *_args):
1989 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
786addd6 1990
ea6d3c3e 1991 # finally, restore the file
9b13f5c4 1992 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
253d4cdd
ERE
1993
1994 def add_member_dir(self, member):
1995 '''
1996 Add member dir to be restored at the end
1997 '''
4e433e00 1998 if not self.canchown:
253d4cdd
ERE
1999 self._directories.append(DirItem(name=member.name, mode=member.mode,
2000 mtime=member.mtime))
2001 else:
2002 self._directories.append(DirItem(name=member.name, mode=member.mode,
2003 mtime=member.mtime, gname=member.gname, uname=member.uname,
4e433e00 2004 uid=member.uid, gid=member.gid, issym=member.issym()))
253d4cdd
ERE
2005
2006class DirItem(object):
2007 def __init__(self, **kwargs):
be60ffd0 2008 for k, v in kwargs.items():
9f9ae874 2009 setattr(self, k, v)