reject bad index files with a meaningful error
[python-delta-tar] / deltatar / deltatar.py
CommitLineData
6b2fa38f 1#!/usr/bin/env python3
0708a374 2
51797cd6 3# Copyright (C) 2013, 2014 Intra2net AG
0708a374
ERE
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; either version 3 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see
17# <http://www.gnu.org/licenses/lgpl-3.0.html>
18
19# Author: Eduardo Robles Elvira <edulix@wadobo.com>
20
938c2d54
PG
21DELTATAR_HEADER_VERSION = 1
22DELTATAR_PARAMETER_VERSION = 1
3fdea6d4 23
0708a374
ERE
24import logging
25import datetime
6c678f3a 26import binascii
938c2d54 27import io
0501fe0a 28import operator
0708a374 29import os
0501fe0a 30import copy
82de3376 31import shutil
8a8fadda 32import re
e82f14f5
ERE
33import stat
34import json
0708a374
ERE
35from functools import partial
36
37from . import tarfile
2ae46844 38from . import crypto
0708a374 39
0708a374
ERE
40class NullHandler(logging.Handler):
41 def emit(self, record):
42 pass
24ddf0a2
ERE
43
44
0708a374
ERE
45logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
46
974408b5
ERE
47
48# match mode
49NO_MATCH = False
50MATCH = True
51PARENT_MATCH = 2
52
133d30da
PG
53# encryption direction
54CRYPTO_MODE_ENCRYPT = 0
55CRYPTO_MODE_DECRYPT = 1
56
13cc7dfc
PG
57# The canonical extension for encrypted backup files regardless of the actual
58# encryption parameters is “.pdtcrypt”. This is analogous to the encryption
59# header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
60# Since the introduction of the versioned header there no longer any need
61# for encoding encryption parameters in the file extensions (“.aes128” and
62# suchlike).
63PDTCRYPT_EXTENSION = "pdtcrypt"
2cdd9faf
PG
64PDT_TYPE_ARCHIVE = 0
65PDT_TYPE_AUX = 1
13cc7dfc 66
9eccb1c2
PG
67AUXILIARY_FILE_INDEX = 0
68AUXILIARY_FILE_INFO = 1
69
0708a374
ERE
70class DeltaTar(object):
71 '''
72 Backup class used to create backups
73 '''
74
75 # list of files to exclude in the backup creation or restore operation. It
76 # can contain python regular expressions.
77 excluded_files = []
78
79 # list of files to include in the backup creation or restore operation. It
80 # can contain python regular expressions. If empty, all files in the source
81 # path will be backed up (when creating a backup) or all the files in the
a83fa4ed 82 # backup will be restored (when restoring a backup), but if included_files
0708a374
ERE
83 # is set then only the files include in the list will be processed.
84 included_files = []
85
86 # custom filter of files to be backed up (or restored). Unused and unset
87 # by default. The function receives a file path and must return a boolean.
88 filter_func = None
89
da26094a
ERE
90 # mode in which the delta will be created (when creating a backup) or
91 # opened (when restoring). Accepts modes analog to the tarfile library.
92 mode = ""
0708a374
ERE
93
94 # used together with aes modes to encrypt and decrypt backups.
95 password = None
1f3fd7b0
PG
96 crypto_key = None
97 nacl = None
0708a374 98
dbee011c
PG
99 # parameter version to use when encrypting; note that this has no effect
100 # on decryption since the required settings are determined from the headers
54f909ca 101 crypto_version = DELTATAR_HEADER_VERSION
dbee011c
PG
102 crypto_paramversion = None
103
133d30da 104 # when encrypting or decrypting, these hold crypto handlers; created before
2ae46844 105 # establishing the Tarfile stream iff a password is supplied.
133d30da
PG
106 encryptor = None
107 decryptor = None
2ae46844 108
0708a374
ERE
109 # python logger object.
110 logger = None
111
3a7e1a50
ERE
112 # specifies the index mode in the same format as @param mode, but without
113 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
2ae46844 114 # that the index is encrypted if no password is given in the constructor.
3a7e1a50 115 index_mode = None
0708a374
ERE
116
117 # current time for this backup. Used for file names and file creation checks
118 current_time = None
119
9eae9a1f
ERE
120 # extra data to included in the header of the index file when creating a
121 # backup
122 extra_data = dict()
123
0708a374
ERE
124 # valid tarfile modes and their corresponding default file extension
125 __file_extensions_dict = {
da26094a
ERE
126 '': '',
127 ':': '',
128 ':gz': '.gz',
129 ':bz2': '.bz2',
130 '|': '',
131 '|gz': '.gz',
132 '|bz2': '.bz2',
133 '#gz': '.gz',
6e99d23a
PG
134 '#gz.pdtcrypt': '.gz',
135 '#pdtcrypt': '',
d1c38f40 136 '#': '',
0708a374
ERE
137 }
138
3a7e1a50
ERE
139 # valid index modes and their corresponding default file extension
140 __index_extensions_dict = {
141 '': '',
142 'gz': '.gz',
143 'bz2': '.bz2',
6e99d23a
PG
144 'gz.pdtcrypt': '.gz',
145 'pdtcrypt': '',
3a7e1a50
ERE
146 }
147
8adbe50d
ERE
148 # valid path prefixes
149 __path_prefix_list = [
150 u'snapshot://',
151 u'list://',
152 u'delete://'
153 ]
154
0708a374 155 def __init__(self, excluded_files=[], included_files=[],
da26094a 156 filter_func=None, mode="", password=None,
1f3fd7b0 157 crypto_key=None, nacl=None,
54f909ca 158 crypto_version=DELTATAR_HEADER_VERSION,
dbee011c 159 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
3a7e1a50 160 logger=None, index_mode=None, index_name_func=None,
0708a374
ERE
161 volume_name_func=None):
162 '''
163 Constructor. Configures the diff engine.
164
165 Parameters:
166 - excluded_files: list of files to exclude in the backup creation or
167 restore operation. It can contain python regular expressions.
168
169 - included_files: list of files to include in the backup creation or
170 restore operation. It can contain python regular expressions. If
171 empty, all files in the source path will be backed up (when creating a
172 backup) or all the files in the backup will be restored (when
a83fa4ed 173 restoring a backup), but if included_files is set then only the files
0708a374
ERE
174 include in the list will be processed.
175
176 - filter_func: custom filter of files to be backed up (or restored).
177 Unused and unset by default. The function receives a file path and
178 must return a boolean.
179
180 - mode: mode in which the delta will be created (when creating a backup)
181 or opened (when restoring). Accepts the same modes as the tarfile
182 library. Valid modes are:
183
da26094a
ERE
184 '' open uncompressed
185 ':' open uncompressed
186 ':gz' open with gzip compression
187 ':bz2' open with bzip2 compression
188 '|' open an uncompressed stream of tar blocks
189 '|gz' open a gzip compressed stream of tar blocks
190 '|bz2' open a bzip2 compressed stream of tar blocks
191 '#gz' open a stream of gzip compressed tar blocks
0708a374 192
1f3fd7b0
PG
193 - crypto_key: used to encrypt and decrypt backups. Encryption will
194 be enabled automatically if a key is supplied. Requires a salt to be
195 passed as well.
196
197 - nacl: salt that was used to derive the encryption key for embedding
198 in the PDTCRYPT header. Not needed when decrypting and when
199 encrypting with password.
200
6e99d23a
PG
201 - password: used to encrypt and decrypt backups. Encryption will be
202 enabled automatically if a password is supplied.
0708a374 203
54f909ca
PG
204 - crypto_version: version of the format, determining the kind of PDT
205 object header.
206
dbee011c
PG
207 - crypto_paramversion: optionally request encryption conforming to
208 a specific parameter version. Defaults to the standard PDT value
209 which as of 2017 is the only one available.
210
0708a374
ERE
211 - logger: python logger object. Optional.
212
3a7e1a50 213 - index_mode: specifies the index mode in the same format as @param
6e99d23a
PG
214 mode, but without the ':', '|' or '#' at the begining. If encryption
215 is requested it will extend to the auxiliary (index, info) files as
216 well. This is an optional parameter that will automatically mimic
217 @param mode by default if not provided. Valid modes are:
3a7e1a50
ERE
218
219 '' open uncompressed
220 'gz' open with gzip compression
221 'bz2' open with bzip2 compression
0708a374
ERE
222
223 - index_name_func: function that sets a custom name for the index file.
2cc6e32b
PG
224 This function receives a flag to indicate whether the name will be
225 used for a full or diff backup. The backup path will be prepended to
226 its return value.
0708a374
ERE
227
228 - volume_name_func: function that defines the name of tar volumes. It
229 receives the backup_path, if it's a full backup and the volume number,
230 and must return the name for the corresponding volume name. Optional,
231 DeltaTar has default names for tar volumes.
232 '''
233
da26094a 234 if mode not in self.__file_extensions_dict:
8a54d5dd
PG
235 raise Exception('Unrecognized extension mode=[%s] requested for files'
236 % str(mode))
0708a374
ERE
237
238 self.excluded_files = excluded_files
239 self.included_files = included_files
240 self.filter_func = filter_func
241 self.logger = logging.getLogger('deltatar.DeltaTar')
242 if logger:
243 self.logger.addHandler(logger)
244 self.mode = mode
2ae46844 245
1f3fd7b0
PG
246 if crypto_key is not None:
247 self.crypto_key = crypto_key
248 self.nacl = nacl # encryption only
249
2ae46844
PG
250 if password is not None:
251 self.password = password
3a7e1a50 252
54f909ca
PG
253 if crypto_version is not None:
254 self.crypto_version = crypto_version
255
dbee011c
PG
256 if crypto_paramversion is not None:
257 self.crypto_paramversion = crypto_paramversion
258
3a7e1a50
ERE
259 # generate index_mode
260 if index_mode is None:
261 index_mode = ''
6e99d23a 262 if 'gz' in mode:
3a7e1a50
ERE
263 index_mode = "gz"
264 elif 'bz2' in mode:
265 index_mode = "bz2"
266 elif mode not in self.__index_extensions_dict:
8a54d5dd
PG
267 raise Exception('Unrecognized extension mode=[%s] requested for index'
268 % str(mode))
3a7e1a50
ERE
269
270 self.index_mode = index_mode
0708a374
ERE
271 self.current_time = datetime.datetime.now()
272
273 if index_name_func is not None:
274 self.index_name_func = index_name_func
275
276 if volume_name_func is not None:
277 self.volume_name_func = volume_name_func
278
e54cfec5 279 def pick_extension(self, kind, mode=None):
2cdd9faf
PG
280 """
281 Choose the extension depending on a) the kind of file given, b) the
282 processing mode, and c) the current encryption settings.
283 """
284 ret = ""
285 if kind == PDT_TYPE_ARCHIVE:
286 ret += ".tar"
e54cfec5
PG
287 if mode is None:
288 mode = self.__index_extensions_dict [self.index_mode]
2cdd9faf 289 ret += mode
a83fa4ed 290 if self.crypto_key is not None or self.password is not None:
2cdd9faf
PG
291 ret += "." + PDTCRYPT_EXTENSION
292 return ret
293
f0287fb7 294 def index_name_func(self, is_full): # pylint: disable=method-hidden
0708a374 295 '''
2cc6e32b
PG
296 Callback for setting a custom name for the index file. Depending on
297 whether *is_full* is set, it will create a suitable name for a full
298 or a diff backup.
0708a374
ERE
299 '''
300 prefix = "bfull" if is_full else "bdiff"
f7940c31 301 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf
PG
302 extension = self.pick_extension \
303 (PDT_TYPE_AUX,
304 self.__index_extensions_dict [self.index_mode])
0708a374 305
da26094a 306 return "%s-%s.index%s" % (prefix, date_str, extension)
0708a374 307
f0287fb7
CH
308 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
309 is_full, volume_number,
310 guess_name=False):
0708a374
ERE
311 '''
312 function that defines the name of tar volumes. It receives the
313 backup_path, if it's a full backup and the volume number, and must return
314 the name for the corresponding volume name. Optional, DeltaTar has default
315 names for tar volumes.
df86af81
ERE
316
317 If guess_name is activated, the file is intended not to be created but
318 to be found, and thus the date will be guessed.
0708a374
ERE
319 '''
320 prefix = "bfull" if is_full else "bdiff"
2cdd9faf
PG
321 extension = self.pick_extension \
322 (PDT_TYPE_ARCHIVE,
323 self.__file_extensions_dict [self.mode])
0708a374 324
df86af81 325 if not guess_name:
f7940c31 326 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf 327 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
df86af81
ERE
328 else:
329 prefix = prefix + "-"
90b75470 330 postfix = "-%03d%s" % (volume_number + 1, extension)
86a6e741
ERE
331 for f in os.listdir(backup_path):
332 if f.startswith(prefix) and f.endswith(postfix):
333 return f
df86af81
ERE
334 raise Exception("volume not found")
335
0708a374 336
974408b5 337 def filter_path(self, path, source_path="", is_dir=None):
8a8fadda
ERE
338 '''
339 Filters a path, given the source_path, using the filtering properties
340 set in the constructor.
341 The filtering order is:
342 1. included_files (if any)
343 2. excluded_files
344 3. filter_func (which must return whether the file is accepted or not)
345 '''
75059f3c 346
c1af2184 347 if len(source_path) > 0:
75059f3c
CH
348 # ensure that exactly one '/' at end of dir is also removed
349 source_path = source_path.rstrip(os.sep) + os.sep
8a8fadda
ERE
350 path = path[len(source_path):]
351
352 # 1. filter included_files
974408b5 353 match = MATCH
8a8fadda 354 if len(self.included_files) > 0:
974408b5 355 match = NO_MATCH
8a8fadda
ERE
356 for i in self.included_files:
357 # it can be either a regexp or a string
be60ffd0 358 if isinstance(i, str):
8a8fadda
ERE
359 # if the string matches, then continue
360 if i == path:
974408b5 361 match = MATCH
c1af2184 362 break
8a8fadda
ERE
363
364 # if the string ends with / it's a directory, and if the
7b07645e 365 # path is contained in it, it is included
c1af2184 366 if i.endswith('/') and path.startswith(i):
974408b5 367 match = MATCH
c1af2184 368 break
8a8fadda
ERE
369
370 # if the string doesn't end with /, add it and do the same
371 # check
c1af2184 372 elif path.startswith(i + '/'):
974408b5 373 match = MATCH
c1af2184 374 break
8a8fadda 375
974408b5
ERE
376 # check for PARENT_MATCH
377 if is_dir:
378 dir_path = path
379 if not dir_path.endswith('/'):
380 dir_path += '/'
381
382 if i.startswith(dir_path):
383 match = PARENT_MATCH
384
8a8fadda
ERE
385 # if it's a reg exp, then we just check if it matches
386 elif isinstance(i, re._pattern_type):
c1af2184 387 if i.match(path):
974408b5 388 match = MATCH
c1af2184 389 break
8a8fadda 390 else:
4bda6f45 391 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
8a8fadda 392
974408b5
ERE
393 if match == NO_MATCH:
394 return NO_MATCH
c1af2184 395
974408b5
ERE
396 # when a directory is in PARENT_MATCH, it doesn't matter if it's
397 # excluded. It's subfiles will be excluded, but the directory itself
398 # won't
399 if match != PARENT_MATCH:
8a8fadda
ERE
400 for e in self.excluded_files:
401 # it can be either a regexp or a string
be60ffd0 402 if isinstance(e, str):
8a8fadda 403 # if the string matches, then exclude
c1af2184 404 if e == path:
974408b5 405 return NO_MATCH
8a8fadda
ERE
406
407 # if the string ends with / it's a directory, and if the
408 # path starts with the directory, then exclude
c1af2184 409 if e.endswith('/') and path.startswith(e):
974408b5 410 return NO_MATCH
8a8fadda
ERE
411
412 # if the string doesn't end with /, do the same check with
413 # the slash added
c1af2184 414 elif path.startswith(e + '/'):
974408b5 415 return NO_MATCH
8a8fadda
ERE
416
417 # if it's a reg exp, then we just check if it matches
c1af2184
ERE
418 elif isinstance(e, re._pattern_type):
419 if e.match(path):
974408b5 420 return NO_MATCH
8a8fadda 421 else:
4bda6f45 422 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
8a8fadda
ERE
423
424 if self.filter_func:
425 return self.filter_func(path)
426
974408b5 427 return match
8a8fadda 428
283fbd5e 429 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
0708a374
ERE
430 '''
431 Walk a directory recursively, yielding each file/directory
0708a374
ERE
432 '''
433
283fbd5e 434 source_path = source_path.rstrip(os.sep)
0708a374 435
283fbd5e 436 if keep_base_dir:
adf7dac4 437 beginning_size = 0
283fbd5e
CH
438 else:
439 beginning_size = len(source_path) + 1 # +1 for os.sep
440
441 queue = [source_path]
442
d07c8065 443 while queue:
df86af81 444 cur_path = queue.pop(0)
0708a374 445
d86735e4
ERE
446 # it might have been removed in the mean time
447 if not os.path.exists(cur_path):
448 continue
449
7dec665c
CH
450 for filename in sorted(os.listdir(cur_path)):
451 child = os.path.join(cur_path, filename)
d07c8065
ERE
452 is_dir = os.path.isdir(child)
453 status = self.filter_path(child, source_path, is_dir)
7dec665c
CH
454 if status == NO_MATCH:
455 continue
456 if not os.access(child, os.R_OK):
4bda6f45 457 self.logger.warning('Error accessing possibly locked file %s' % child)
7dec665c 458 continue
8a8fadda 459
d07c8065 460 if status == MATCH:
adf7dac4 461 yield child[beginning_size:]
0708a374 462
d07c8065
ERE
463 if is_dir and (status == MATCH or status == PARENT_MATCH):
464 queue.append(child)
0708a374 465
e82f14f5
ERE
466 def _stat_dict(self, path):
467 '''
468 Returns a dict with the stat data used to compare files
469 '''
470 stinfo = os.stat(path)
471 mode = stinfo.st_mode
472
473 ptype = None
474 if stat.S_ISDIR(mode):
d07c8065 475 ptype = u'directory'
e82f14f5 476 elif stat.S_ISREG(mode):
d07c8065 477 ptype = u'file'
e82f14f5 478 elif stat.S_ISLNK(mode):
d07c8065 479 ptype = u'link'
e82f14f5
ERE
480
481 return {
d07c8065 482 u'type': ptype,
be60ffd0 483 u'path': path,
d07c8065 484 u'mode': mode,
0501fe0a
ERE
485 u'mtime': int(stinfo.st_mtime),
486 u'ctime': int(stinfo.st_ctime),
d07c8065
ERE
487 u'uid': stinfo.st_uid,
488 u'gid': stinfo.st_gid,
489 u'inode': stinfo.st_ino,
490 u'size': stinfo.st_size
e82f14f5
ERE
491 }
492
df99a044 493 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
d07c8065
ERE
494 '''
495 Return if the dicts are equal in the stat keys
496 '''
fc8fdcbc 497 keys = [u'type', u'mode',u'size', u'mtime',
d041935c 498 # not restored: u'inode', u'ctime'
df99a044 499 ]
8adbe50d 500
fc8fdcbc 501 # only if user is root, then also check gid/uid. otherwise do not check it,
d041935c 502 # because tarfile can chown in case of being superuser only
50d70ca9
PG
503 #
504 # also, skip the check in rpmbuild since the sources end up with the
505 # uid:gid of the packager while the extracted files are 0:0.
506 if hasattr(os, "geteuid") and os.geteuid() == 0 \
507 and os.getenv ("RPMBUILD_OPTIONS") is None:
fc8fdcbc
ERE
508 keys.append('gid')
509 keys.append('uid')
510
ea6d3c3e 511 if (not d1 and d2 != None) or (d1 != None and not d2):
8adbe50d
ERE
512 return False
513
cbac9f0b
ERE
514 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
515 return False
8adbe50d 516
fc8fdcbc
ERE
517 type = d1.get('type', '')
518
d07c8065 519 for key in keys:
fc8fdcbc
ERE
520 # size doesn't matter for directories
521 if type == 'directory' and key == 'size':
522 continue
d07c8065
ERE
523 if d1.get(key, -1) != d2.get(key, -2):
524 return False
525 return True
526
df99a044 527 def prefixed(self, path, listsnapshot_equal=False):
8adbe50d
ERE
528 '''
529 if a path is not prefixed, return it prefixed
530 '''
531 for prefix in self.__path_prefix_list:
532 if path.startswith(prefix):
df99a044
ERE
533 if listsnapshot_equal and prefix == u'list://':
534 return u'snapshot://' + path[len(prefix):]
8adbe50d
ERE
535 return path
536 return u'snapshot://' + path
537
538 def unprefixed(self, path):
539 '''
540 remove a path prefix if any
541 '''
542 for prefix in self.__path_prefix_list:
543 if path.startswith(prefix):
544 return path[len(prefix):]
545 return path
546
133d30da
PG
547
548 def initialize_encryption (self, mode):
549 password = self.password
1f3fd7b0
PG
550 key = self.crypto_key
551 nacl = self.nacl
133d30da 552
1f3fd7b0 553 if key is None and password is None:
133d30da
PG
554 return
555 if mode == CRYPTO_MODE_ENCRYPT:
1f3fd7b0
PG
556 return crypto.Encrypt (password=password,
557 key=key,
558 nacl=nacl,
54f909ca 559 version=self.crypto_version,
774ca538 560 paramversion=self.crypto_paramversion)
133d30da 561 if mode == CRYPTO_MODE_DECRYPT:
1f3fd7b0 562 return crypto.Decrypt (password=password, key=key)
133d30da
PG
563
564 raise Exception ("invalid encryption mode [%r]" % mode)
565
566
9eccb1c2 567 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
3a7e1a50 568 '''
9eccb1c2
PG
569 Given the specified configuration, opens a file for reading or writing,
570 inheriting the encryption and compression settings from the backup.
571 Returns a file object ready to use.
3fdea6d4 572
c8c72fe1
PG
573 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
574 respectively).
575 :type mode: str
774ca538
PG
576 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
577 Both the info and the auxiliary file have a globally
578 unique, constant counter value.
3fdea6d4 579 :type kind: str
3a7e1a50 580 '''
3a7e1a50
ERE
581 if self.index_mode.startswith('gz'):
582 comptype = 'gz'
583 elif self.index_mode.startswith('bz2'):
584 comptype = 'bz2'
585 else:
586 comptype = 'tar'
587
133d30da 588 crypto_ctx = None
6de9444a 589 enccounter = None
133d30da 590 if mode == "w":
774ca538 591 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 592 elif mode == "r":
774ca538 593 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
133d30da 594
3031b7ae
PG
595 if crypto_ctx is not None:
596 if kind == AUXILIARY_FILE_INFO:
597 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
598 elif kind == AUXILIARY_FILE_INDEX:
599 enccounter = crypto.AES_GCM_IV_CNT_INDEX
600 else:
601 raise Exception ("invalid kind of aux file %r" % kind)
602
c8c72fe1 603 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
3fdea6d4 604 bufsize=tarfile.RECORDSIZE, fileobj=None,
6de9444a 605 encryption=crypto_ctx, enccounter=enccounter)
c8c72fe1
PG
606
607 return sink
608
3a7e1a50 609
0708a374 610 def create_full_backup(self, source_path, backup_path,
d4a05db6 611 max_volume_size=None, extra_data=dict()):
0708a374
ERE
612 '''
613 Creates a full backup.
614
615 Parameters:
616 - source_path: source path to the directory to back up.
617 - backup_path: path where the back up will be stored. Backup path will
618 be created if not existent.
d5361dac
ERE
619 - max_volume_size: maximum volume size in megabytes. Used to split the
620 backup in volumes. Optional (won't split in volumes by default).
9eae9a1f
ERE
621 - extra_data: a json-serializable dictionary with information that you
622 want to be included in the header of the index file
0708a374
ERE
623 '''
624 # check input
be60ffd0 625 if not isinstance(source_path, str):
0708a374
ERE
626 raise Exception('Source path must be a string')
627
be60ffd0 628 if not isinstance(backup_path, str):
0708a374
ERE
629 raise Exception('Backup path must be a string')
630
631 if not os.path.exists(source_path) or not os.path.isdir(source_path):
632 raise Exception('Source path "%s" does not exist or is not a '\
633 'directory' % source_path)
634
d07c8065
ERE
635 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
636 max_volume_size < 1):
637 raise Exception('max_volume_size must be a positive integer')
d5361dac
ERE
638 if max_volume_size != None:
639 max_volume_size = max_volume_size*1024*1024
640
9eae9a1f
ERE
641 if not isinstance(extra_data, dict):
642 raise Exception('extra_data must be a dictionary')
643
644 try:
645 extra_data_str = json.dumps(extra_data)
646 except:
647 raise Exception('extra_data is not json-serializable')
648
0708a374
ERE
649 if not os.access(source_path, os.R_OK):
650 raise Exception('Source path "%s" is not readable' % source_path)
651
652 # try to create backup path if needed
653 if not os.path.exists(backup_path):
d4a05db6 654 os.makedirs(backup_path)
0708a374
ERE
655
656 if not os.access(backup_path, os.W_OK):
657 raise Exception('Backup path "%s" is not writeable' % backup_path)
658
659 if source_path.endswith('/'):
660 source_path = source_path[:-1]
661
662 if backup_path.endswith('/'):
663 backup_path = backup_path[:-1]
664
665 # update current time
666 self.current_time = datetime.datetime.now()
667
668 if self.mode not in self.__file_extensions_dict:
669 raise Exception('Unrecognized extension')
670
2ae46844 671 # setup for encrypting payload
774ca538
PG
672 if self.encryptor is None:
673 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
2ae46844 674
0708a374 675 # some initialization
11684b1d 676 self.vol_no = 0
0708a374
ERE
677
678 # generate the first volume name
679 vol_name = self.volume_name_func(backup_path, True, 0)
680 tarfile_path = os.path.join(backup_path, vol_name)
681
774ca538
PG
682 # init index
683 index_name = self.index_name_func(True)
684 index_path = os.path.join(backup_path, index_name)
685 index_sink = self.open_auxiliary_file(index_path, 'w')
e82f14f5 686
d5361dac
ERE
687 cwd = os.getcwd()
688
b7c47f38 689 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
0708a374
ERE
690 '''
691 Handles the new volumes
692 '''
d5361dac
ERE
693 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
694 volume_path = os.path.join(backup_path, volume_name)
11684b1d 695 deltarobj.vol_no = volume_number
d5361dac
ERE
696
697 # we convert relative paths into absolute because CWD is changed
698 if not os.path.isabs(volume_path):
699 volume_path = os.path.join(cwd, volume_path)
11684b1d 700
8e019196
ERE
701 if tarobj.fileobj is not None:
702 tarobj.fileobj.close()
703
b008f989
ERE
704 deltarobj.logger.debug("opening volume %s" % volume_path)
705
b7c47f38 706 tarobj.open_volume(volume_path, encryption=encryption)
d5361dac
ERE
707
708 # wraps some args from context into the handler
133d30da 709 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
0708a374 710
774ca538 711 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
6c678f3a 712
be60ffd0 713 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
6c678f3a 714 # calculate checksum and write into the stream
c2ffe2ec 715 crc = binascii.crc32(s) & 0xFFFFffff
774ca538 716 index_sink.write(s)
e82f14f5 717
0708a374
ERE
718 # start creating the tarfile
719 tarobj = tarfile.TarFile.open(tarfile_path,
da26094a 720 mode='w' + self.mode,
0708a374 721 format=tarfile.GNU_FORMAT,
d1c38f40 722 concat='#' in self.mode,
133d30da 723 encryption=self.encryptor,
0708a374 724 max_volume_size=max_volume_size,
ea625b04 725 new_volume_handler=new_volume_handler,
e2b59b34
ERE
726 save_to_members=False,
727 dereference=True)
e5c6ca04 728 os.chdir(source_path)
55b8686d
ERE
729
730 # for each file to be in the backup, do:
e82f14f5 731 for path in self._recursive_walk_dir('.'):
55b8686d 732 # calculate stat dict for current file
253d4cdd
ERE
733 statd = self._stat_dict(path)
734 statd['path'] = u'snapshot://' + statd['path']
735 statd['volume'] = self.vol_no
55b8686d
ERE
736
737 # backup file
253d4cdd 738 tarobj.add(path, arcname = statd['path'], recursive=False)
11684b1d 739
55b8686d 740 # retrieve file offset
253d4cdd 741 statd['offset'] = tarobj.get_last_member_offset()
b008f989 742 self.logger.debug("backup %s" % statd['path'])
6c678f3a 743
d041935c 744 # store the stat dict in the index
be60ffd0 745 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
6c678f3a 746 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 747 index_sink.write(s)
e82f14f5 748
be60ffd0 749 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
6c678f3a 750 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 751 index_sink.write(s)
be60ffd0 752 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
774ca538
PG
753 index_sink.write(s)
754
e5c6ca04 755 os.chdir(cwd)
0708a374 756 tarobj.close()
c8c72fe1 757 index_sink.close (close_fileobj=True)
938c2d54 758
0708a374 759 def create_diff_backup(self, source_path, backup_path, previous_index_path,
d4a05db6 760 max_volume_size=None, extra_data=dict()):
0708a374
ERE
761 '''
762 Creates a backup.
763
764 Parameters:
765 - source_path: source path to the directory to back up.
766 - backup_path: path where the back up will be stored. Backup path will
767 be created if not existent.
768 - previous_index_path: index of the previous backup, needed to know
769 which files changed since then.
770 - max_volume_size: maximum volume size in megabytes (MB). Used to split
771 the backup in volumes. Optional (won't split in volumes by default).
3a7e1a50
ERE
772
773 NOTE: previous index is assumed to follow exactly the same format as
774 the index_mode setup in the constructor.
0708a374 775 '''
d07c8065 776 # check/sanitize input
be60ffd0 777 if not isinstance(source_path, str):
d07c8065
ERE
778 raise Exception('Source path must be a string')
779
be60ffd0 780 if not isinstance(backup_path, str):
d07c8065
ERE
781 raise Exception('Backup path must be a string')
782
783 if not os.path.exists(source_path) or not os.path.isdir(source_path):
784 raise Exception('Source path "%s" does not exist or is not a '\
785 'directory' % source_path)
786
9eae9a1f
ERE
787 if not isinstance(extra_data, dict):
788 raise Exception('extra_data must be a dictionary')
789
790 try:
791 extra_data_str = json.dumps(extra_data)
792 except:
793 raise Exception('extra_data is not json-serializable')
794
d07c8065
ERE
795 if not os.access(source_path, os.R_OK):
796 raise Exception('Source path "%s" is not readable' % source_path)
797
798 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
799 max_volume_size < 1):
800 raise Exception('max_volume_size must be a positive integer')
801 if max_volume_size != None:
802 max_volume_size = max_volume_size*1024*1024
803
be60ffd0 804 if not isinstance(previous_index_path, str):
d07c8065
ERE
805 raise Exception('previous_index_path must be A string')
806
807 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
808 raise Exception('Index path "%s" does not exist or is not a '\
809 'file' % previous_index_path)
810
811 if not os.access(previous_index_path, os.R_OK):
812 raise Exception('Index path "%s" is not readable' % previous_index_path)
813
814 # try to create backup path if needed
815 if not os.path.exists(backup_path):
d4a05db6 816 os.makedirs(backup_path)
d07c8065
ERE
817
818 if not os.access(backup_path, os.W_OK):
819 raise Exception('Backup path "%s" is not writeable' % backup_path)
820
821 if source_path.endswith('/'):
822 source_path = source_path[:-1]
823
824 if backup_path.endswith('/'):
825 backup_path = backup_path[:-1]
826
827 # update current time
828 self.current_time = datetime.datetime.now()
829
830 if self.mode not in self.__file_extensions_dict:
831 raise Exception('Unrecognized extension')
832
2ae46844 833 # setup for encrypting payload
774ca538
PG
834 if self.encryptor is None:
835 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 836
d07c8065
ERE
837 # some initialization
838 self.vol_no = 0
839
840 # generate the first volume name
df86af81
ERE
841 vol_name = self.volume_name_func(backup_path, is_full=False,
842 volume_number=0)
d07c8065
ERE
843 tarfile_path = os.path.join(backup_path, vol_name)
844
938c2d54 845 # init index
d07c8065
ERE
846 cwd = os.getcwd()
847
3031b7ae
PG
848 index_name = self.index_name_func(is_full=False)
849 index_path = os.path.join(backup_path, index_name)
850 index_sink = self.open_auxiliary_file(index_path, 'w')
851
d07c8065
ERE
852 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
853 '''
854 Handles the new volumes
855 '''
df86af81
ERE
856 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
857 volume_number=volume_number)
d07c8065
ERE
858 volume_path = os.path.join(backup_path, volume_name)
859 deltarobj.vol_no = volume_number
860
861 # we convert relative paths into absolute because CWD is changed
862 if not os.path.isabs(volume_path):
863 volume_path = os.path.join(cwd, volume_path)
864
f624ff3d 865 deltarobj.logger.debug("opening volume %s" % volume_path)
d07c8065
ERE
866 tarobj.open_volume(volume_path)
867
868 # wraps some args from context into the handler
869 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
870
3031b7ae 871 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
d07c8065 872
be60ffd0 873 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
d07c8065 874 # calculate checksum and write into the stream
c2ffe2ec 875 crc = binascii.crc32(s) & 0xFFFFffff
3031b7ae 876 index_sink.write(s)
d07c8065
ERE
877
878 # start creating the tarfile
879 tarobj = tarfile.TarFile.open(tarfile_path,
880 mode='w' + self.mode,
881 format=tarfile.GNU_FORMAT,
d1c38f40 882 concat='#' in self.mode,
133d30da 883 encryption=self.encryptor,
d07c8065 884 max_volume_size=max_volume_size,
ea625b04 885 new_volume_handler=new_volume_handler,
e2b59b34
ERE
886 save_to_members=False,
887 dereference=True)
d07c8065 888
aae127d0
ERE
889
890 # create the iterators, first the previous index iterator, then the
891 # source path directory iterator and collate and iterate them
892 if not os.path.isabs(previous_index_path):
893 previous_index_path = os.path.join(cwd, previous_index_path)
894 index_it = self.iterate_index_path(previous_index_path)
895
d07c8065 896 os.chdir(source_path)
aae127d0
ERE
897 dir_it = self._recursive_walk_dir('.')
898 dir_path_it = self.jsonize_path_iterator(dir_it)
d07c8065 899
df86af81
ERE
900 def pr(path):
901 if not path:
902 return "None"
903 else:
904 return path["path"]
8edb2e3c 905
d07c8065 906 # for each file to be in the backup, do:
df86af81 907 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
aae127d0
ERE
908 action = None
909 # if file is not in the index, it means it's a new file, so we have
910 # to take a snapshot
df86af81 911
aae127d0
ERE
912 if not ipath:
913 action = 'snapshot'
914 # if the file is not in the directory iterator, it means that it has
d041935c 915 # been deleted, so we need to mark it as such
aae127d0
ERE
916 elif not dpath:
917 action = 'delete'
918 # if the file is in both iterators, it means it might have either
919 # not changed (in which case we will just list it in our index but
920 # it will not be included in the tar file), or it might have
e8d95fe5 921 # changed, in which case we will snapshot it.
aae127d0
ERE
922 elif ipath and dpath:
923 if self._equal_stat_dicts(ipath, dpath):
924 action = 'list'
925 else:
926 action = 'snapshot'
927 # TODO: when creating chained backups (i.e. diffing from another
928 # diff), we will need to detect the type of action in the previous
929 # index, because if it was delete and dpath is None, we should
930 # discard the file
931
932 if action == 'snapshot':
933 # calculate stat dict for current file
934 stat = dpath.copy()
be60ffd0 935 stat['path'] = "snapshot://" + dpath['path']
aae127d0
ERE
936 stat['volume'] = self.vol_no
937
50f43227
ERE
938 self.logger.debug("[STORE] %s" % dpath['path'])
939
aae127d0 940 # backup file
8adbe50d 941 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
aae127d0
ERE
942
943 # retrieve file offset
944 stat['offset'] = tarobj.get_last_member_offset()
aae127d0 945 elif action == 'delete':
50f43227 946 path = self.unprefixed(ipath['path'])
aae127d0 947 stat = {
50f43227 948 u'path': u'delete://' + path,
aae127d0
ERE
949 u'type': ipath['type']
950 }
50f43227 951 self.logger.debug("[DELETE] %s" % path)
aae127d0
ERE
952
953 # mark it as deleted in the backup
42d39ca7 954 tarobj.add("/dev/null", arcname=stat['path'])
aae127d0
ERE
955 elif action == 'list':
956 stat = dpath.copy()
50f43227
ERE
957 path = self.unprefixed(ipath['path'])
958 stat['path'] = u'list://' + path
aae127d0 959 # unchanged files do not enter in the backup, only in the index
50f43227 960 self.logger.debug("[UNCHANGED] %s" % path)
80910564
TJ
961 else:
962 # should not happen
4bda6f45 963 self.logger.warning('unknown action in create_diff_backup: {0}'
80910564
TJ
964 ''.format(action))
965 stat = None
aae127d0 966
80910564
TJ
967 if stat:
968 # store the stat dict in the index
be60ffd0 969 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
aae127d0 970 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 971 index_sink.write(s)
aae127d0 972
be60ffd0 973 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
aae127d0 974 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 975 index_sink.write(s)
be60ffd0 976 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
3031b7ae 977 index_sink.write(s)
938c2d54 978
df86af81 979 index_it.release()
aae127d0
ERE
980 os.chdir(cwd)
981 tarobj.close()
938c2d54
PG
982 index_sink.close()
983
984
d07c8065 985 def iterate_index_path(self, index_path):
df86af81
ERE
986 '''
987 Returns an index iterator. Internally, it uses a classic iterator class.
988 We do that instead of just yielding so that the iterator object can have
989 an additional function to close the file descriptor that is opened in
990 the constructor.
991 '''
d07c8065 992
df86af81
ERE
993 class IndexPathIterator(object):
994 def __init__(self, delta_tar, index_path):
995 self.delta_tar = delta_tar
996 self.index_path = index_path
997 self.f = None
9eae9a1f 998 self.extra_data = dict()
df86af81 999 self.__enter__()
d07c8065 1000
df86af81
ERE
1001 def __iter__(self):
1002 return self
d07c8065 1003
df86af81
ERE
1004 def release(self):
1005 if self.f:
1006 self.f.close()
1007
1008 def __enter__(self):
1009 '''
1010 Allows this iterator to be used with the "with" statement
1011 '''
1012 if self.f is None:
9eccb1c2 1013 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
df86af81
ERE
1014 # check index header
1015 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1016 if j.get("type", '') != 'python-delta-tar-index' or\
1017 j.get('version', -1) != 1:
1018 raise Exception("invalid index file format: %s" % json.dumps(j))
1019
9eae9a1f
ERE
1020 self.extra_data = j.get('extra_data', dict())
1021
df86af81
ERE
1022 # find BEGIN-FILE-LIST, ignore other headers
1023 while True:
1024 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1025 if j.get('type', '') == 'BEGIN-FILE-LIST':
1026 break
1027 return self
1028
1029 def __exit__(self, type, value, tb):
1030 '''
1031 Allows this iterator to be used with the "with" statement
1032 '''
ec57ce53
ERE
1033 if self.f:
1034 self.f.close()
df86af81 1035 self.f = None
d07c8065 1036
be60ffd0 1037 def __next__(self):
0349168a 1038 # read each file in the index and process it to do the restore
df86af81
ERE
1039 j = {}
1040 l_no = -1
1041 try:
1042 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
be60ffd0 1043 except Exception as e:
df86af81
ERE
1044 if self.f:
1045 self.f.close()
1046 raise e
d07c8065 1047
df86af81 1048 op_type = j.get('type', '')
d07c8065 1049
df86af81
ERE
1050 # when we detect the end of the list, break the loop
1051 if op_type == 'END-FILE-LIST':
1052 if self.f:
1053 self.f.close()
1054 raise StopIteration
1055
1056 # check input
1057 if op_type not in ['directory', 'file', 'link']:
4bda6f45 1058 self.delta_tar.logger.warning('unrecognized type to be '
df86af81
ERE
1059 'restored: %s, line %d' % (op_type, l_no))
1060 # iterate again
be60ffd0 1061 return self.__next__()
df86af81
ERE
1062
1063 return j, l_no
d07c8065 1064
df86af81 1065 return IndexPathIterator(self, index_path)
d07c8065 1066
26fdd428 1067 def iterate_tar_path(self, tar_path, new_volume_handler=None):
24ddf0a2
ERE
1068 '''
1069 Returns a tar iterator that iterates jsonized member items that contain
1070 an additional "member" field, used by RestoreHelper.
1071 '''
ec57ce53 1072 class TarPathIterator(object):
83a81852 1073 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
24ddf0a2 1074 self.delta_tar = delta_tar
ec57ce53 1075 self.tar_path = tar_path
24ddf0a2 1076 self.tar_obj = None
6bca471c 1077 self.last_member = None
26fdd428 1078 self.new_volume_handler = new_volume_handler
24ddf0a2
ERE
1079 self.__enter__()
1080
1081 def __iter__(self):
1082 return self
1083
1084 def release(self):
1085 if self.tar_obj:
1086 self.tar_obj.close()
1087
1088 def __enter__(self):
1089 '''
1090 Allows this iterator to be used with the "with" statement
1091 '''
1092 if self.tar_obj is None:
d5e1d60f
PG
1093 decryptor = None
1094 if self.delta_tar.password is not None:
1f3fd7b0
PG
1095 decryptor = crypto.Decrypt \
1096 (password=self.delta_tar.password,
1097 key=self.delta_tar.crypto_key)
ec57ce53
ERE
1098 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1099 mode='r' + self.delta_tar.mode,
1100 format=tarfile.GNU_FORMAT,
d1c38f40 1101 concat='#' in self.delta_tar.mode,
d5e1d60f 1102 encryption=decryptor,
83a81852 1103 new_volume_handler=self.new_volume_handler,
e2b59b34
ERE
1104 save_to_members=False,
1105 dereference=True)
24ddf0a2
ERE
1106 return self
1107
1108 def __exit__(self, type, value, tb):
1109 '''
1110 Allows this iterator to be used with the "with" statement
1111 '''
ec57ce53
ERE
1112 if self.tar_obj:
1113 self.tar_obj.close()
24ddf0a2
ERE
1114 self.tar_obj = None
1115
be60ffd0 1116 def __next__(self):
24ddf0a2
ERE
1117 '''
1118 Read each member and return it as a stat dict
1119 '''
be60ffd0 1120 tarinfo = self.tar_obj.__iter__().__next__()
8e019196
ERE
1121 # NOTE: here we compare if tarinfo.path is the same as before
1122 # instead of comparing the tarinfo object itself because the
1123 # object itself might change for multivol tarinfos
1124 if tarinfo is None or (self.last_member is not None and\
1125 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
ec57ce53
ERE
1126 raise StopIteration
1127
6bca471c
ERE
1128 self.last_member = tarinfo
1129
24ddf0a2
ERE
1130 ptype = 'unknown'
1131 if tarinfo.isfile():
1132 ptype = 'file'
1133 elif tarinfo.isdir():
ab7e7465 1134 ptype = 'directory'
24ddf0a2
ERE
1135 elif tarinfo.islnk() or tarinfo.issym():
1136 ptype = 'link'
1137
1138 return {
1139 u'type': ptype,
1140 u'path': tarinfo.path,
1141 u'mode': tarinfo.mode,
1142 u'mtime': tarinfo.mtime,
1143 u'ctime': -1, # cannot restore
1144 u'uid': tarinfo.uid,
1145 u'gid': tarinfo.gid,
1146 u'inode': -1, # cannot restore
1147 u'size': tarinfo.size,
1148 u'member': tarinfo
ec57ce53
ERE
1149 }, 0
1150
26fdd428 1151 return TarPathIterator(self, tar_path, new_volume_handler)
24ddf0a2 1152
df99a044 1153 def jsonize_path_iterator(self, iter, strip=0):
d07c8065
ERE
1154 '''
1155 converts the yielded items of an iterator into json path lines.
df99a044
ERE
1156
1157 strip: Strip the smallest prefix containing num leading slashes from
1158 the file path.
d07c8065
ERE
1159 '''
1160 while True:
1161 try:
be60ffd0 1162 path = iter.__next__()
df99a044 1163 if strip == 0:
4ac6d333 1164 yield self._stat_dict(path), 0
df99a044
ERE
1165 else:
1166 st = self._stat_dict(path)
1167 st['path'] = "/".join(path.split("/")[strip:])
4ac6d333 1168 yield st, 0
d07c8065
ERE
1169 except StopIteration:
1170 break
1171
1172 def collate_iterators(self, it1, it2):
1173 '''
1174 Collate two iterators, so that it returns pairs of the items of each
1175 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1176 when there's no match for the items in the other iterator.
1177
1178 It assumes that the items in both lists are ordered in the same way.
1179 '''
ea6d3c3e 1180 l_no = 0
d07c8065
ERE
1181 elem1, elem2 = None, None
1182 while True:
1183 if not elem1:
1184 try:
be60ffd0 1185 elem1, l_no = it1.__next__()
d07c8065
ERE
1186 except StopIteration:
1187 if elem2:
ea6d3c3e 1188 yield (None, elem2, l_no)
d07c8065 1189 for elem2 in it2:
ea6d3c3e
ERE
1190 if isinstance(elem2, tuple):
1191 elem2 = elem2[0]
1192 yield (None, elem2, l_no)
d07c8065 1193 break
d07c8065
ERE
1194 if not elem2:
1195 try:
be60ffd0 1196 elem2 = it2.__next__()
d07c8065
ERE
1197 if isinstance(elem2, tuple):
1198 elem2 = elem2[0]
1199 except StopIteration:
1200 if elem1:
ea6d3c3e 1201 yield (elem1, None, l_no)
df99a044 1202 for elem1, l_no in it1:
ea6d3c3e 1203 yield (elem1, None, l_no)
d07c8065 1204 break
670f9934
ERE
1205
1206 index1 = self.unprefixed(elem1['path'])
1207 index2 = self.unprefixed(elem2['path'])
1208 i1, i2 = self.compare_indexes(index1, index2)
1209
1210 yield1 = yield2 = None
1211 if i1 is not None:
1212 yield1 = elem1
1213 elem1 = None
1214 if i2 is not None:
1215 yield2 = elem2
1216 elem2 = None
1217 yield (yield1, yield2, l_no)
1218
1219 def compare_indexes(self, index1, index2):
1220 '''
1221 Compare iterator indexes and return a tuple in the following form:
1222 if index1 < index2, returns (index1, None)
1223 if index1 == index2 returns (index1, index2)
1224 else: returns (None, index2)
1225 '''
1226 l1 = index1.split('/')
1227 l2 = index2.split('/')
1228 length = len(l2) - len(l1)
1229
1230 if length > 0:
1231 return (index1, None)
1232 elif length < 0:
1233 return (None, index2)
1234
1235 for i1, i2 in zip(l1, l2):
1236 if i1 < i2:
1237 return (index1, None)
1238 elif i1 > i2:
1239 return (None, index2)
1240
1241 return (index1, index2)
0708a374 1242
8c65a2b1 1243 def list_backup(self, backup_tar_path, list_func=None):
be60ffd0 1244 if not isinstance(backup_tar_path, str):
8c65a2b1
ERE
1245 raise Exception('Backup tar path must be a string')
1246
1247 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1248 raise Exception('Source path "%s" does not exist or is not a '\
1249 'file' % backup_tar_path)
1250
1251 if not os.access(backup_tar_path, os.R_OK):
1252 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1253
1254 cwd = os.getcwd()
1255
b7c47f38 1256 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
8c65a2b1
ERE
1257 '''
1258 Handles the new volumes
1259 '''
1260 volume_name = deltarobj.volume_name_func(backup_path, True,
1261 volume_number, guess_name=True)
1262 volume_path = os.path.join(backup_path, volume_name)
1263
1264 # we convert relative paths into absolute because CWD is changed
1265 if not os.path.isabs(volume_path):
1266 volume_path = os.path.join(cwd, volume_path)
b7c47f38
PG
1267 tarobj.open_volume(volume_path, encryption=encryption)
1268
774ca538
PG
1269 if self.decryptor is None:
1270 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
8c65a2b1
ERE
1271
1272 backup_path = os.path.dirname(backup_tar_path)
1273 if not os.path.isabs(backup_path):
1274 backup_path = os.path.join(cwd, backup_path)
133d30da 1275 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
b7a6566b 1276
8c65a2b1
ERE
1277 tarobj = tarfile.TarFile.open(backup_tar_path,
1278 mode='r' + self.mode,
1279 format=tarfile.GNU_FORMAT,
d1c38f40 1280 concat='#' in self.mode,
133d30da 1281 encryption=self.decryptor,
ea625b04 1282 new_volume_handler=new_volume_handler,
e2b59b34
ERE
1283 save_to_members=False,
1284 dereference=True)
8c65a2b1
ERE
1285
1286 def filter(cls, list_func, tarinfo):
1287 if list_func is None:
b008f989 1288 self.logger.info(tarinfo.path)
8c65a2b1
ERE
1289 else:
1290 list_func(tarinfo)
1291 return False
1292 filter = partial(filter, self, list_func)
1293
1294 tarobj.extractall(filter=filter)
1295 tarobj.close()
1296
0708a374 1297 def restore_backup(self, target_path, backup_indexes_paths=[],
e93f83f1
PG
1298 backup_tar_path=None, restore_callback=None,
1299 disaster=False):
0708a374
ERE
1300 '''
1301 Restores a backup.
1302
1303 Parameters:
0708a374
ERE
1304 - target_path: path to restore.
1305 - backup_indexes_paths: path to backup indexes, in descending date order.
1306 The indexes indicate the location of their respective backup volumes,
1307 and multiple indexes are needed to be able to restore diff backups.
1308 Note that this is an optional parameter: if not suplied, it will
1309 try to restore directly from backup_tar_path.
1310 - backup_tar_path: path to the backup tar file. Used as an alternative
1311 to backup_indexes_paths to restore directly from a tar file without
1312 using any file index. If it's a multivol tarfile, volume_name_func
1313 will be called.
4da27cfe 1314 - restore_callback: callback function to be called during restore.
b0aef801 1315 This is passed to the helper and gets called for every file.
11684b1d 1316
3a7e1a50 1317 NOTE: If you want to use an index to restore a backup, this function
11684b1d
ERE
1318 only supports to do so when the tarfile mode is either uncompressed or
1319 uses concat compress mode, because otherwise it would be very slow.
3a7e1a50
ERE
1320
1321 NOTE: Indices are assumed to follow the same format as the index_mode
1322 specified in the constructor.
e93f83f1
PG
1323
1324 Returns the list of files that could not be restored, if there were
1325 any.
0708a374 1326 '''
11684b1d 1327 # check/sanitize input
be60ffd0 1328 if not isinstance(target_path, str):
e5c6ca04
ERE
1329 raise Exception('Target path must be a string')
1330
11684b1d
ERE
1331 if backup_indexes_paths is None and backup_tar_path == []:
1332 raise Exception("You have to either provide index paths or a tar path")
e5c6ca04 1333
ea6d3c3e
ERE
1334 if len(backup_indexes_paths) == 0:
1335 mode = "tar"
1336 else:
1337 mode = "diff"
1338
1339 if mode == "tar":
be60ffd0 1340 if not isinstance(backup_tar_path, str):
11684b1d
ERE
1341 raise Exception('Backup tar path must be a string')
1342
1343 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1344 raise Exception('Source path "%s" does not exist or is not a '\
1345 'file' % backup_tar_path)
1346
1347 if not os.access(backup_tar_path, os.R_OK):
1348 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1349 else:
1350 if not isinstance(backup_indexes_paths, list):
1351 raise Exception('backup_indexes_paths must be a list')
1352
1353 if self.mode.startswith(':') or self.mode.startswith('|'):
1354 raise Exception('Restore only supports either uncompressed tars'
1355 ' or concat compression when restoring from an index, and '
1356 ' the open mode you provided is "%s"' % self.mode)
1357
1358 for index in backup_indexes_paths:
be60ffd0 1359 if not isinstance(index, str):
11684b1d 1360 raise Exception('indices must be strings')
e5c6ca04 1361
11684b1d
ERE
1362 if not os.path.exists(index) or not os.path.isfile(index):
1363 raise Exception('Index path "%s" does not exist or is not a '\
1364 'file' % index)
1365
1366 if not os.access(index, os.R_OK):
1367 raise Exception('Index path "%s" is not readable' % index)
e5c6ca04
ERE
1368
1369 # try to create backup path if needed
1370 if not os.path.exists(target_path):
1371 os.makedirs(target_path)
1372
ec57ce53
ERE
1373 # make backup_tar_path absolute so that iterate_tar_path works fine
1374 if backup_tar_path and not os.path.isabs(backup_tar_path):
1375 backup_tar_path = os.path.abspath(backup_tar_path)
1376
d5361dac 1377 cwd = os.getcwd()
ec57ce53 1378 os.chdir(target_path)
d5361dac 1379
2ae46844 1380 # setup for decrypting payload
774ca538
PG
1381 if self.decryptor is None:
1382 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
2ae46844 1383
ea6d3c3e 1384 if mode == 'tar':
24ddf0a2
ERE
1385 index_it = self.iterate_tar_path(backup_tar_path)
1386 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
ec57ce53 1387 tarobj=index_it.tar_obj)
ea6d3c3e 1388 elif mode == "diff":
044585c6 1389 helper = RestoreHelper(self, cwd, backup_indexes_paths, disaster=disaster)
f3d10816
PG
1390 try:
1391 # get iterator from newest index at _data[0]
1392 index1 = helper._data[0]["path"]
1393 index_it = self.iterate_index_path(index1)
1394 except tarfile.DecryptionError as exn:
1395 self.logger.error("failed to decrypt file [%s]: %s; is this an "
1396 "actual index file?"
1397 % (str(exn), index1))
1398 return [(index1, exn)]
d07c8065 1399
24ddf0a2
ERE
1400 dir_it = self._recursive_walk_dir('.')
1401 dir_path_it = self.jsonize_path_iterator(dir_it)
11684b1d 1402
e93f83f1
PG
1403 failed = [] # irrecoverable files
1404
a395759e 1405 # for each file to be restored, do:
24ddf0a2
ERE
1406 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1407 if not ipath:
1408 upath = dpath['path']
1409 op_type = dpath['type']
1410 else:
1411 upath = self.unprefixed(ipath['path'])
1412 op_type = ipath['type']
42c04ead 1413
24ddf0a2 1414 # filter paths
75059f3c 1415 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
24ddf0a2 1416 continue
ea6d3c3e 1417
24ddf0a2
ERE
1418 # if types of the file mismatch, the file needs to be deleted
1419 # and re-restored
1420 if ipath is not None and dpath is not None and\
1421 dpath['type'] != ipath['type']:
1422 helper.delete(upath)
1423
1424 # if file not found in dpath, we can directly restore from index
1425 if not dpath:
1426 # if the file doesn't exist and it needs to be deleted, it
1427 # means that work is already done
1428 if ipath['path'].startswith('delete://'):
ea6d3c3e 1429 continue
24ddf0a2 1430 try:
b008f989 1431 self.logger.debug("restore %s" % ipath['path'])
4da27cfe 1432 helper.restore(ipath, l_no, restore_callback)
be60ffd0 1433 except Exception as e:
e93f83f1 1434 iipath = ipath.get ("path", "")
7b07645e 1435 self.logger.error("FAILED to restore: {} ({})"
e93f83f1
PG
1436 .format(iipath, e))
1437 if disaster is True:
1438 failed.append ((iipath, e))
24ddf0a2 1439 continue
11684b1d 1440
24ddf0a2
ERE
1441 # if both files are equal, we have nothing to restore
1442 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1443 continue
1444
1445 # we have to restore the file, but first we need to delete the
1446 # current existing file.
1447 # we don't delete the file if it's a directory, because it might
1448 # just have changed mtime, so it's quite inefficient to remove
1449 # it
1450 if ipath:
1451 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
42c04ead 1452 helper.delete(upath)
b008f989 1453 self.logger.debug("restore %s" % ipath['path'])
e93f83f1
PG
1454 try:
1455 helper.restore(ipath, l_no, restore_callback)
1456 except Exception as e:
1457 if disaster is False:
1458 raise
1459 failed.append ((ipath.get ("path", ""), e))
1460 continue
24ddf0a2
ERE
1461
1462 # if the file is not in the index (so it comes from the target
1463 # directory) then we have to delete it
1464 else:
c9d47a03 1465 self.logger.debug("delete %s" % upath)
24ddf0a2 1466 helper.delete(upath)
42c04ead 1467
ec57ce53
ERE
1468 helper.restore_directories_permissions()
1469 index_it.release()
1470 os.chdir(cwd)
1471 helper.cleanup()
ea6d3c3e 1472
e93f83f1
PG
1473 return failed
1474
1475
1476 def recover_backup(self, target_path, backup_indexes_paths=[],
1477 restore_callback=None):
1478 """
1479 Walk the index, extracting objects in disaster mode. Bad files are
1480 reported along with a reason.
1481 """
1482 return self.restore_backup(target_path,
1483 backup_indexes_paths=backup_indexes_paths,
1484 disaster=True)
1485
1486
11684b1d
ERE
1487 def _parse_json_line(self, f, l_no):
1488 '''
ee0e095f 1489 Read line from file like object and process it as JSON.
11684b1d
ERE
1490 '''
1491 l = f.readline()
1492 l_no += 1
1493 try:
be60ffd0 1494 j = json.loads(l.decode('UTF-8'))
ee0e095f
PG
1495 except UnicodeDecodeError as e:
1496 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1497 raise Exception \
1498 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1499 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1500 from e
1501 raise Exception \
1502 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1503 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1504 from e
be60ffd0 1505 except ValueError as e:
11684b1d
ERE
1506 raise Exception("error parsing this json line "
1507 "(line number %d): %s" % (l_no, l))
1508 return j, l_no
ea6d3c3e 1509
24ddf0a2 1510
ea6d3c3e
ERE
1511class RestoreHelper(object):
1512 '''
1513 Class used to help to restore files from indices
1514 '''
1515
1516 # holds the dicts of data
1517 _data = []
1518
1519 _deltatar = None
1520
1521 _cwd = None
1522
0501fe0a
ERE
1523 # list of directories to be restored. This is done as a last step, see
1524 # tarfile.extractall for details.
1525 _directories = []
1526
e93f83f1
PG
1527 _disaster = False
1528
037994ca 1529 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
e93f83f1 1530 tarobj=None, disaster=False):
ea6d3c3e
ERE
1531 '''
1532 Constructor opens the tars and init the data structures.
1533
037994ca
PG
1534 Assumptions:
1535
1536 - Index list must be provided in reverse order (newer first).
1537 - “newer first” apparently means that if there are n backups
1538 provided, the last full backup is at index n-1 and the most recent
1539 diff backup is at index 0.
1540 - Only the first, the second, and the last elements of
1541 ``index_list`` are relevant, others will not be accessed.
1542 - If no ``index_list`` is provided, both ``tarobj`` and
1543 ``backup_path`` must be passed.
1544 - If ``index_list`` is provided, the values of ``tarobj`` and
1545 ``backup_path`` are ignored.
ea6d3c3e
ERE
1546 '''
1547 self._data = []
0501fe0a 1548 self._directories = []
ea6d3c3e
ERE
1549 self._deltatar = deltatar
1550 self._cwd = cwd
3031b7ae 1551 self._password = deltatar.password
1f3fd7b0 1552 self._crypto_key = deltatar.crypto_key
3031b7ae 1553 self._decryptors = []
e93f83f1 1554 self._disaster = disaster
ea6d3c3e 1555
253d4cdd
ERE
1556 try:
1557 import grp, pwd
1558 except ImportError:
1559 grp = pwd = None
1560
1561 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1562 self.canchown = True
1563 else:
1564 self.canchown = False
1565
037994ca 1566 if index_list is not None:
24ddf0a2 1567 for index in index_list:
037994ca 1568 is_full = index == index_list[-1]
24ddf0a2 1569
d5e1d60f 1570 decryptor = None
3031b7ae 1571 if self._password is not None:
1f3fd7b0
PG
1572 decryptor = crypto.Decrypt (password=self._password,
1573 key=self._crypto_key)
d5e1d60f 1574
24ddf0a2
ERE
1575 # make paths absolute to avoid cwd problems
1576 if not os.path.isabs(index):
1577 index = os.path.normpath(os.path.join(cwd, index))
1578
1579 s = dict(
1580 curr_vol_no = None,
1581 vol_fd = None,
1582 offset = -1,
1583 tarobj = None,
1584 path = index,
1585 is_full = is_full,
1586 iterator = None,
1587 last_itelement = None,
1588 last_lno = 0,
1589 new_volume_handler = partial(self.new_volume_handler,
1590 self._deltatar, self._cwd, is_full,
d5e1d60f
PG
1591 os.path.dirname(index), decryptor),
1592 decryptor = decryptor
24ddf0a2
ERE
1593 )
1594 self._data.append(s)
1595 else:
ea6d3c3e 1596 # make paths absolute to avoid cwd problems
24ddf0a2
ERE
1597 if not os.path.isabs(backup_path):
1598 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
ea6d3c3e 1599
ec57ce53
ERE
1600 # update the new_volume_handler of tar_obj
1601 tarobj.new_volume_handler = partial(self.new_volume_handler,
b7c47f38 1602 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
133d30da 1603 self._deltatar.decryptor)
ea6d3c3e
ERE
1604 s = dict(
1605 curr_vol_no = None,
1606 vol_fd = None,
1607 offset = -1,
24ddf0a2
ERE
1608 tarobj = tarobj,
1609 path = backup_path,
1610 is_full = True,
670f9934
ERE
1611 iterator = None,
1612 last_itelement = None,
1613 last_lno = 0,
d5e1d60f
PG
1614 new_volume_handler = tarobj.new_volume_handler,
1615 decryptor = self._deltatar.decryptor
ea6d3c3e
ERE
1616 )
1617 self._data.append(s)
1618
3031b7ae 1619
ea6d3c3e
ERE
1620 def cleanup(self):
1621 '''
1622 Closes all open files
1623 '''
1624 for data in self._data:
55b2ffd0
ERE
1625 if data['vol_fd']:
1626 data['vol_fd'].close()
1627 data['vol_fd'] = None
ea6d3c3e
ERE
1628 if data['tarobj']:
1629 data['tarobj'].close()
1630 data['tarobj'] = None
ea6d3c3e
ERE
1631
1632 def delete(self, path):
1633 '''
1634 Delete a file
1635 '''
df99a044
ERE
1636 if not os.path.exists(path):
1637 return
1638
24ddf0a2 1639 # to preserve parent directory mtime, we save it
283fbd5e 1640 parent_dir = os.path.dirname(path) or os.getcwd()
24ddf0a2
ERE
1641 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1642
561bc39f 1643 if os.path.isdir(path) and not os.path.islink(path):
ea6d3c3e
ERE
1644 shutil.rmtree(path)
1645 else:
1646 os.unlink(path)
1647
24ddf0a2
ERE
1648 # now we restore parent_directory mtime
1649 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1650
4da27cfe 1651 def restore(self, itpath, l_no, callback=None):
ea6d3c3e 1652 '''
8a54d5dd 1653 Restore the path from the appropriate backup. Receives the current path
e8d95fe5 1654 from the newest (=first) index iterator. itpath must be not null.
b0aef801 1655 callback is a custom function that gets called for every file.
037994ca
PG
1656
1657 NB: This function takes the attribute ``_data`` as input but will only
1658 ever use its first and, if available, second element. Anything else in
1659 ``._data[]`` will be ignored.
ea6d3c3e 1660 '''
ea6d3c3e
ERE
1661 path = itpath['path']
1662
4da27cfe
SA
1663 # Calls the callback function
1664 if callback:
1665 callback()
1666
ea6d3c3e 1667 if path.startswith('delete://'):
df86af81
ERE
1668 # the file has previously been deleted already in restore_backup in
1669 # all cases so we just need to finish
ea6d3c3e 1670 return
df86af81 1671
e8d95fe5 1672 # get data from newest index (_data[0])
df86af81
ERE
1673 data = self._data[0]
1674 upath = self._deltatar.unprefixed(path)
1675
24ddf0a2 1676 # to preserve parent directory mtime, we save it
283fbd5e 1677 parent_dir = os.path.dirname(upath) or os.getcwd()
ec57ce53
ERE
1678 if not os.path.exists(parent_dir):
1679 os.makedirs(parent_dir)
24ddf0a2
ERE
1680 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1681
e8d95fe5 1682 # if path is found in the newest index as to be snapshotted, deal with it
df86af81
ERE
1683 # and finish
1684 if path.startswith('snapshot://'):
e93f83f1
PG
1685 try:
1686 self.restore_file(itpath, data, path, l_no, upath)
1687 except Exception:
1688 raise
24ddf0a2
ERE
1689
1690 # now we restore parent_directory mtime
1691 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
ea6d3c3e
ERE
1692 return
1693
1694 # we go from index to index, finding the path in the index, then finding
1695 # the index with the most recent snapshot of the file being restored
e8d95fe5
TJ
1696 #
1697 # Right now we support diff backups, only. No incremental backups.
1698 # As a result _data[0] is always the diff backup index
1699 # and _data[1] the full backup index.
527670c4 1700 if len(self._data) == 2:
7273719c 1701 data = self._data[1]
527670c4
TJ
1702 d, l_no, dpath = self.find_path_in_index(data, upath)
1703 if not d:
1704 self._deltatar.logger.warning('Error restoring file %s from '
1705 'index, not found in index %s' % (path, data['path']))
1706 return
1707
1708 cur_path = d.get('path', '')
1709 if cur_path.startswith('delete://'):
1710 self._deltatar.logger.warning(('Strange thing happened, file '
1711 '%s was listed in first index but deleted by another '
1712 'one. Path was ignored and untouched.') % path)
1713 return
1714 elif cur_path.startswith('snapshot://'):
1715 # this code path is reached when the file is unchanged
1716 # in the newest index and therefore of type 'list://'
1717 self.restore_file(d, data, path, l_no, dpath)
1718
1719 # now we restore parent_directory mtime
1720 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1721 return
1722
1723 # error code path is reached when:
1724 # a) we have more than two indexes (unsupported atm)
1725 # b) both indexes contain a list:// entry (logic error)
1726 # c) we have just one index and it also contains list://
4bda6f45 1727 self._deltatar.logger.warning(('Error restoring file %s from index, '
ea6d3c3e
ERE
1728 'snapshot not found in any index') % path)
1729
670f9934
ERE
1730 def find_path_in_index(self, data, upath):
1731 # NOTE: we restart the iterator sometimes because the iterator can be
1732 # walked over completely multiple times, for example if one path if not
1733 # found in one index and we have to go to the next index.
7273719c
PG
1734 it = data['iterator']
1735 if it is None:
670f9934 1736 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
be60ffd0 1737 d, l_no = it.__next__()
670f9934 1738 else:
670f9934
ERE
1739 d = data['last_itelement']
1740 l_no = data['last_lno']
1741
670f9934 1742 while True:
7273719c 1743 dpath = self._deltatar.unprefixed(d.get('path', ''))
670f9934
ERE
1744 if upath == dpath:
1745 data['last_itelement'] = d
1746 data['last_lno'] = l_no
1747 return d, l_no, dpath
1748
1749 up, dp = self._deltatar.compare_indexes(upath, dpath)
1750 # any time upath should have appeared before current dpath, it means
1751 # upath is just not in this index and we should stop
1752 if dp is None:
1753 data['last_itelement'] = d
1754 data['last_lno'] = l_no
1755 return None, 0, ''
1756
1757 try:
be60ffd0 1758 d, l_no = it.__next__()
670f9934
ERE
1759 except StopIteration:
1760 data['last_itelement'] = d
1761 data['last_lno'] = l_no
1762 return None, 0, ''
670f9934 1763
0501fe0a
ERE
1764 def restore_directories_permissions(self):
1765 '''
1766 Restore directory permissions when everything have been restored
1767 '''
42c04ead
ERE
1768 try:
1769 import grp, pwd
1770 except ImportError:
1771 grp = pwd = None
1772
0501fe0a
ERE
1773 self._directories.sort(key=operator.attrgetter('name'))
1774 self._directories.reverse()
0501fe0a
ERE
1775
1776 # Set correct owner, mtime and filemode on directories.
1777 for member in self._directories:
1778 dirpath = member.name
1779 try:
42c04ead
ERE
1780 os.chmod(dirpath, member.mode)
1781 os.utime(dirpath, (member.mtime, member.mtime))
253d4cdd 1782 if self.canchown:
42c04ead
ERE
1783 # We have to be root to do so.
1784 try:
1785 g = grp.getgrnam(member.gname)[2]
1786 except KeyError:
1787 g = member.gid
1788 try:
1789 u = pwd.getpwnam(member.uname)[2]
1790 except KeyError:
1791 u = member.uid
1792 try:
4e433e00 1793 if member.issym and hasattr(os, "lchown"):
42c04ead
ERE
1794 os.lchown(dirpath, u, g)
1795 else:
1796 os.chown(dirpath, u, g)
1797 except EnvironmentError:
1798 raise tarfile.ExtractError("could not change owner")
1799
be60ffd0 1800 except tarfile.ExtractError as e:
4bda6f45 1801 self._deltatar.logger.warning('tarfile: %s' % e)
0501fe0a 1802
df86af81 1803 @staticmethod
b7c47f38 1804 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
ea6d3c3e
ERE
1805 '''
1806 Handles the new volumes
1807 '''
df86af81
ERE
1808 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1809 volume_number, guess_name=True)
ea6d3c3e
ERE
1810 volume_path = os.path.join(backup_path, volume_name)
1811
1812 # we convert relative paths into absolute because CWD is changed
1813 if not os.path.isabs(volume_path):
1814 volume_path = os.path.join(cwd, volume_path)
b7c47f38 1815 tarobj.open_volume(volume_path, encryption=encryption)
ea6d3c3e 1816
253d4cdd 1817 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
ea6d3c3e
ERE
1818 '''
1819 Restores a snapshot of a file from a specific backup
1820 '''
ea6d3c3e 1821 op_type = file_data.get('type', -1)
24ddf0a2 1822 member = file_data.get('member', None)
9f9ae874 1823 ismember = bool(member)
24ddf0a2
ERE
1824
1825 # when member is set, then we can assume everything is right and we
1826 # just have to restore the path
a2a37de7 1827 if member is None:
24ddf0a2
ERE
1828 vol_no = file_data.get('volume', -1)
1829 # sanity check
1830 if not isinstance(vol_no, int) or vol_no < 0:
4bda6f45 1831 self._deltatar.logger.warning('unrecognized type to be restored: '
24ddf0a2
ERE
1832 '%s, line %d' % (op_type, l_no))
1833
1834 # setup the volume that needs to be read. only needed when member is
1835 # not set
a2a37de7 1836 if index_data['curr_vol_no'] != vol_no:
24ddf0a2
ERE
1837 index_data['curr_vol_no'] = vol_no
1838 backup_path = os.path.dirname(index_data['path'])
1839 vol_name = self._deltatar.volume_name_func(backup_path,
1840 index_data['is_full'], vol_no, guess_name=True)
1841 vol_path = os.path.join(backup_path, vol_name)
1842 if index_data['vol_fd']:
1843 index_data['vol_fd'].close()
be60ffd0 1844 index_data['vol_fd'] = open(vol_path, 'rb')
24ddf0a2
ERE
1845
1846 # force reopen of the tarobj because of new volume
1847 if index_data['tarobj']:
1848 index_data['tarobj'].close()
1849 index_data['tarobj'] = None
1850
1851 # seek tarfile if needed
1852 offset = file_data.get('offset', -1)
ea6d3c3e 1853 if index_data['tarobj']:
c6226e2a
PG
1854 try:
1855 member = index_data['tarobj'].__iter__().__next__()
e93f83f1
PG
1856 except tarfile.DecryptionError:
1857 pass
1858 except tarfile.CompressionError:
1859 pass
1860
24ddf0a2
ERE
1861 if not member or member.path != file_data['path']:
1862 # force a seek and reopen
1863 index_data['tarobj'].close()
1864 index_data['tarobj'] = None
1865
1866 # open the tarfile if needed
1867 if not index_data['tarobj']:
1868 index_data['vol_fd'].seek(offset)
1869 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1870 fileobj=index_data['vol_fd'],
1871 format=tarfile.GNU_FORMAT,
d1c38f40 1872 concat='#' in self._deltatar.mode,
d5e1d60f 1873 encryption=index_data["decryptor"],
253d4cdd 1874 new_volume_handler=index_data['new_volume_handler'],
044585c6
PG
1875 save_to_members=False,
1876 tolerant=self._disaster)
24ddf0a2 1877
be60ffd0 1878 member = index_data['tarobj'].__iter__().__next__()
ea6d3c3e 1879
253d4cdd
ERE
1880 member.path = unprefixed_path
1881 member.name = unprefixed_path
0501fe0a
ERE
1882
1883 if op_type == 'directory':
253d4cdd 1884 self.add_member_dir(member)
0501fe0a 1885 member = copy.copy(member)
be60ffd0 1886 member.mode = 0o0700
0501fe0a 1887
df86af81
ERE
1888 # if it's an existing directory, we then don't need to recreate it
1889 # just set the right permissions, mtime and that kind of stuff
1890 if os.path.exists(member.path):
1891 return
1892
9f9ae874 1893 if not ismember:
24ddf0a2
ERE
1894 # set current volume number in tarobj, otherwise the extraction of the
1895 # file might fail when trying to extract a multivolume member
1896 index_data['tarobj'].volume_number = index_data['curr_vol_no']
86a6e741 1897
9b13f5c4
PG
1898 def ignore_symlink (member, *_args):
1899 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
786addd6 1900
ea6d3c3e 1901 # finally, restore the file
9b13f5c4 1902 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
253d4cdd
ERE
1903
1904 def add_member_dir(self, member):
1905 '''
1906 Add member dir to be restored at the end
1907 '''
4e433e00 1908 if not self.canchown:
253d4cdd
ERE
1909 self._directories.append(DirItem(name=member.name, mode=member.mode,
1910 mtime=member.mtime))
1911 else:
1912 self._directories.append(DirItem(name=member.name, mode=member.mode,
1913 mtime=member.mtime, gname=member.gname, uname=member.uname,
4e433e00 1914 uid=member.uid, gid=member.gid, issym=member.issym()))
253d4cdd
ERE
1915
1916class DirItem(object):
1917 def __init__(self, **kwargs):
be60ffd0 1918 for k, v in kwargs.items():
9f9ae874 1919 setattr(self, k, v)