extend tarfile API for rescue mode
[python-delta-tar] / deltatar / deltatar.py
CommitLineData
6b2fa38f 1#!/usr/bin/env python3
0708a374 2
51797cd6 3# Copyright (C) 2013, 2014 Intra2net AG
0708a374
ERE
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; either version 3 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see
17# <http://www.gnu.org/licenses/lgpl-3.0.html>
18
19# Author: Eduardo Robles Elvira <edulix@wadobo.com>
20
938c2d54
PG
21DELTATAR_HEADER_VERSION = 1
22DELTATAR_PARAMETER_VERSION = 1
3fdea6d4 23
0708a374
ERE
24import logging
25import datetime
6c678f3a 26import binascii
938c2d54 27import io
0501fe0a 28import operator
0708a374 29import os
0501fe0a 30import copy
82de3376 31import shutil
8a8fadda 32import re
e82f14f5
ERE
33import stat
34import json
0708a374
ERE
35from functools import partial
36
37from . import tarfile
2ae46844 38from . import crypto
0708a374 39
0708a374
ERE
40class NullHandler(logging.Handler):
41 def emit(self, record):
42 pass
24ddf0a2
ERE
43
44
0708a374
ERE
45logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
46
974408b5
ERE
47
48# match mode
49NO_MATCH = False
50MATCH = True
51PARENT_MATCH = 2
52
133d30da
PG
53# encryption direction
54CRYPTO_MODE_ENCRYPT = 0
55CRYPTO_MODE_DECRYPT = 1
56
13cc7dfc
PG
57# The canonical extension for encrypted backup files regardless of the actual
58# encryption parameters is “.pdtcrypt”. This is analogous to the encryption
59# header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
60# Since the introduction of the versioned header there no longer any need
61# for encoding encryption parameters in the file extensions (“.aes128” and
62# suchlike).
63PDTCRYPT_EXTENSION = "pdtcrypt"
2cdd9faf
PG
64PDT_TYPE_ARCHIVE = 0
65PDT_TYPE_AUX = 1
13cc7dfc 66
9eccb1c2
PG
67AUXILIARY_FILE_INDEX = 0
68AUXILIARY_FILE_INFO = 1
69
0708a374
ERE
70class DeltaTar(object):
71 '''
72 Backup class used to create backups
73 '''
74
75 # list of files to exclude in the backup creation or restore operation. It
76 # can contain python regular expressions.
77 excluded_files = []
78
79 # list of files to include in the backup creation or restore operation. It
80 # can contain python regular expressions. If empty, all files in the source
81 # path will be backed up (when creating a backup) or all the files in the
a83fa4ed 82 # backup will be restored (when restoring a backup), but if included_files
0708a374
ERE
83 # is set then only the files include in the list will be processed.
84 included_files = []
85
86 # custom filter of files to be backed up (or restored). Unused and unset
87 # by default. The function receives a file path and must return a boolean.
88 filter_func = None
89
da26094a
ERE
90 # mode in which the delta will be created (when creating a backup) or
91 # opened (when restoring). Accepts modes analog to the tarfile library.
92 mode = ""
0708a374
ERE
93
94 # used together with aes modes to encrypt and decrypt backups.
95 password = None
1f3fd7b0
PG
96 crypto_key = None
97 nacl = None
0708a374 98
dbee011c
PG
99 # parameter version to use when encrypting; note that this has no effect
100 # on decryption since the required settings are determined from the headers
54f909ca 101 crypto_version = DELTATAR_HEADER_VERSION
dbee011c
PG
102 crypto_paramversion = None
103
133d30da 104 # when encrypting or decrypting, these hold crypto handlers; created before
2ae46844 105 # establishing the Tarfile stream iff a password is supplied.
133d30da
PG
106 encryptor = None
107 decryptor = None
2ae46844 108
0708a374
ERE
109 # python logger object.
110 logger = None
111
3a7e1a50
ERE
112 # specifies the index mode in the same format as @param mode, but without
113 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
2ae46844 114 # that the index is encrypted if no password is given in the constructor.
3a7e1a50 115 index_mode = None
0708a374
ERE
116
117 # current time for this backup. Used for file names and file creation checks
118 current_time = None
119
9eae9a1f
ERE
120 # extra data to included in the header of the index file when creating a
121 # backup
122 extra_data = dict()
123
0708a374
ERE
124 # valid tarfile modes and their corresponding default file extension
125 __file_extensions_dict = {
da26094a
ERE
126 '': '',
127 ':': '',
128 ':gz': '.gz',
129 ':bz2': '.bz2',
130 '|': '',
131 '|gz': '.gz',
132 '|bz2': '.bz2',
133 '#gz': '.gz',
6e99d23a
PG
134 '#gz.pdtcrypt': '.gz',
135 '#pdtcrypt': '',
d1c38f40 136 '#': '',
0708a374
ERE
137 }
138
3a7e1a50
ERE
139 # valid index modes and their corresponding default file extension
140 __index_extensions_dict = {
141 '': '',
142 'gz': '.gz',
143 'bz2': '.bz2',
6e99d23a
PG
144 'gz.pdtcrypt': '.gz',
145 'pdtcrypt': '',
3a7e1a50
ERE
146 }
147
8adbe50d
ERE
148 # valid path prefixes
149 __path_prefix_list = [
150 u'snapshot://',
151 u'list://',
152 u'delete://'
153 ]
154
0708a374 155 def __init__(self, excluded_files=[], included_files=[],
da26094a 156 filter_func=None, mode="", password=None,
1f3fd7b0 157 crypto_key=None, nacl=None,
54f909ca 158 crypto_version=DELTATAR_HEADER_VERSION,
dbee011c 159 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
3a7e1a50 160 logger=None, index_mode=None, index_name_func=None,
0708a374
ERE
161 volume_name_func=None):
162 '''
163 Constructor. Configures the diff engine.
164
165 Parameters:
166 - excluded_files: list of files to exclude in the backup creation or
167 restore operation. It can contain python regular expressions.
168
169 - included_files: list of files to include in the backup creation or
170 restore operation. It can contain python regular expressions. If
171 empty, all files in the source path will be backed up (when creating a
172 backup) or all the files in the backup will be restored (when
a83fa4ed 173 restoring a backup), but if included_files is set then only the files
0708a374
ERE
174 include in the list will be processed.
175
176 - filter_func: custom filter of files to be backed up (or restored).
177 Unused and unset by default. The function receives a file path and
178 must return a boolean.
179
180 - mode: mode in which the delta will be created (when creating a backup)
181 or opened (when restoring). Accepts the same modes as the tarfile
182 library. Valid modes are:
183
da26094a
ERE
184 '' open uncompressed
185 ':' open uncompressed
186 ':gz' open with gzip compression
187 ':bz2' open with bzip2 compression
188 '|' open an uncompressed stream of tar blocks
189 '|gz' open a gzip compressed stream of tar blocks
190 '|bz2' open a bzip2 compressed stream of tar blocks
191 '#gz' open a stream of gzip compressed tar blocks
0708a374 192
1f3fd7b0
PG
193 - crypto_key: used to encrypt and decrypt backups. Encryption will
194 be enabled automatically if a key is supplied. Requires a salt to be
195 passed as well.
196
197 - nacl: salt that was used to derive the encryption key for embedding
198 in the PDTCRYPT header. Not needed when decrypting and when
199 encrypting with password.
200
6e99d23a
PG
201 - password: used to encrypt and decrypt backups. Encryption will be
202 enabled automatically if a password is supplied.
0708a374 203
54f909ca
PG
204 - crypto_version: version of the format, determining the kind of PDT
205 object header.
206
dbee011c
PG
207 - crypto_paramversion: optionally request encryption conforming to
208 a specific parameter version. Defaults to the standard PDT value
209 which as of 2017 is the only one available.
210
0708a374
ERE
211 - logger: python logger object. Optional.
212
3a7e1a50 213 - index_mode: specifies the index mode in the same format as @param
6e99d23a
PG
214 mode, but without the ':', '|' or '#' at the begining. If encryption
215 is requested it will extend to the auxiliary (index, info) files as
216 well. This is an optional parameter that will automatically mimic
217 @param mode by default if not provided. Valid modes are:
3a7e1a50
ERE
218
219 '' open uncompressed
220 'gz' open with gzip compression
221 'bz2' open with bzip2 compression
0708a374
ERE
222
223 - index_name_func: function that sets a custom name for the index file.
2cc6e32b
PG
224 This function receives a flag to indicate whether the name will be
225 used for a full or diff backup. The backup path will be prepended to
226 its return value.
0708a374
ERE
227
228 - volume_name_func: function that defines the name of tar volumes. It
229 receives the backup_path, if it's a full backup and the volume number,
230 and must return the name for the corresponding volume name. Optional,
231 DeltaTar has default names for tar volumes.
232 '''
233
da26094a 234 if mode not in self.__file_extensions_dict:
8a54d5dd
PG
235 raise Exception('Unrecognized extension mode=[%s] requested for files'
236 % str(mode))
0708a374
ERE
237
238 self.excluded_files = excluded_files
239 self.included_files = included_files
240 self.filter_func = filter_func
241 self.logger = logging.getLogger('deltatar.DeltaTar')
242 if logger:
243 self.logger.addHandler(logger)
244 self.mode = mode
2ae46844 245
1f3fd7b0
PG
246 if crypto_key is not None:
247 self.crypto_key = crypto_key
248 self.nacl = nacl # encryption only
249
2ae46844
PG
250 if password is not None:
251 self.password = password
3a7e1a50 252
54f909ca
PG
253 if crypto_version is not None:
254 self.crypto_version = crypto_version
255
dbee011c
PG
256 if crypto_paramversion is not None:
257 self.crypto_paramversion = crypto_paramversion
258
3a7e1a50
ERE
259 # generate index_mode
260 if index_mode is None:
261 index_mode = ''
6e99d23a 262 if 'gz' in mode:
3a7e1a50
ERE
263 index_mode = "gz"
264 elif 'bz2' in mode:
265 index_mode = "bz2"
266 elif mode not in self.__index_extensions_dict:
8a54d5dd
PG
267 raise Exception('Unrecognized extension mode=[%s] requested for index'
268 % str(mode))
3a7e1a50
ERE
269
270 self.index_mode = index_mode
0708a374
ERE
271 self.current_time = datetime.datetime.now()
272
273 if index_name_func is not None:
274 self.index_name_func = index_name_func
275
276 if volume_name_func is not None:
277 self.volume_name_func = volume_name_func
278
e54cfec5 279 def pick_extension(self, kind, mode=None):
2cdd9faf
PG
280 """
281 Choose the extension depending on a) the kind of file given, b) the
282 processing mode, and c) the current encryption settings.
283 """
284 ret = ""
285 if kind == PDT_TYPE_ARCHIVE:
286 ret += ".tar"
e54cfec5
PG
287 if mode is None:
288 mode = self.__index_extensions_dict [self.index_mode]
2cdd9faf 289 ret += mode
a83fa4ed 290 if self.crypto_key is not None or self.password is not None:
2cdd9faf
PG
291 ret += "." + PDTCRYPT_EXTENSION
292 return ret
293
f0287fb7 294 def index_name_func(self, is_full): # pylint: disable=method-hidden
0708a374 295 '''
2cc6e32b
PG
296 Callback for setting a custom name for the index file. Depending on
297 whether *is_full* is set, it will create a suitable name for a full
298 or a diff backup.
0708a374
ERE
299 '''
300 prefix = "bfull" if is_full else "bdiff"
f7940c31 301 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf
PG
302 extension = self.pick_extension \
303 (PDT_TYPE_AUX,
304 self.__index_extensions_dict [self.index_mode])
0708a374 305
da26094a 306 return "%s-%s.index%s" % (prefix, date_str, extension)
0708a374 307
f0287fb7
CH
308 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
309 is_full, volume_number,
310 guess_name=False):
0708a374
ERE
311 '''
312 function that defines the name of tar volumes. It receives the
313 backup_path, if it's a full backup and the volume number, and must return
314 the name for the corresponding volume name. Optional, DeltaTar has default
315 names for tar volumes.
df86af81
ERE
316
317 If guess_name is activated, the file is intended not to be created but
318 to be found, and thus the date will be guessed.
0708a374
ERE
319 '''
320 prefix = "bfull" if is_full else "bdiff"
2cdd9faf
PG
321 extension = self.pick_extension \
322 (PDT_TYPE_ARCHIVE,
323 self.__file_extensions_dict [self.mode])
0708a374 324
df86af81 325 if not guess_name:
f7940c31 326 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf 327 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
df86af81
ERE
328 else:
329 prefix = prefix + "-"
90b75470 330 postfix = "-%03d%s" % (volume_number + 1, extension)
86a6e741
ERE
331 for f in os.listdir(backup_path):
332 if f.startswith(prefix) and f.endswith(postfix):
333 return f
df86af81
ERE
334 raise Exception("volume not found")
335
0708a374 336
974408b5 337 def filter_path(self, path, source_path="", is_dir=None):
8a8fadda
ERE
338 '''
339 Filters a path, given the source_path, using the filtering properties
340 set in the constructor.
341 The filtering order is:
342 1. included_files (if any)
343 2. excluded_files
344 3. filter_func (which must return whether the file is accepted or not)
345 '''
75059f3c 346
c1af2184 347 if len(source_path) > 0:
75059f3c
CH
348 # ensure that exactly one '/' at end of dir is also removed
349 source_path = source_path.rstrip(os.sep) + os.sep
8a8fadda
ERE
350 path = path[len(source_path):]
351
352 # 1. filter included_files
974408b5 353 match = MATCH
8a8fadda 354 if len(self.included_files) > 0:
974408b5 355 match = NO_MATCH
8a8fadda
ERE
356 for i in self.included_files:
357 # it can be either a regexp or a string
be60ffd0 358 if isinstance(i, str):
8a8fadda
ERE
359 # if the string matches, then continue
360 if i == path:
974408b5 361 match = MATCH
c1af2184 362 break
8a8fadda
ERE
363
364 # if the string ends with / it's a directory, and if the
7b07645e 365 # path is contained in it, it is included
c1af2184 366 if i.endswith('/') and path.startswith(i):
974408b5 367 match = MATCH
c1af2184 368 break
8a8fadda
ERE
369
370 # if the string doesn't end with /, add it and do the same
371 # check
c1af2184 372 elif path.startswith(i + '/'):
974408b5 373 match = MATCH
c1af2184 374 break
8a8fadda 375
974408b5
ERE
376 # check for PARENT_MATCH
377 if is_dir:
378 dir_path = path
379 if not dir_path.endswith('/'):
380 dir_path += '/'
381
382 if i.startswith(dir_path):
383 match = PARENT_MATCH
384
8a8fadda
ERE
385 # if it's a reg exp, then we just check if it matches
386 elif isinstance(i, re._pattern_type):
c1af2184 387 if i.match(path):
974408b5 388 match = MATCH
c1af2184 389 break
8a8fadda 390 else:
4bda6f45 391 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
8a8fadda 392
974408b5
ERE
393 if match == NO_MATCH:
394 return NO_MATCH
c1af2184 395
974408b5
ERE
396 # when a directory is in PARENT_MATCH, it doesn't matter if it's
397 # excluded. It's subfiles will be excluded, but the directory itself
398 # won't
399 if match != PARENT_MATCH:
8a8fadda
ERE
400 for e in self.excluded_files:
401 # it can be either a regexp or a string
be60ffd0 402 if isinstance(e, str):
8a8fadda 403 # if the string matches, then exclude
c1af2184 404 if e == path:
974408b5 405 return NO_MATCH
8a8fadda
ERE
406
407 # if the string ends with / it's a directory, and if the
408 # path starts with the directory, then exclude
c1af2184 409 if e.endswith('/') and path.startswith(e):
974408b5 410 return NO_MATCH
8a8fadda
ERE
411
412 # if the string doesn't end with /, do the same check with
413 # the slash added
c1af2184 414 elif path.startswith(e + '/'):
974408b5 415 return NO_MATCH
8a8fadda
ERE
416
417 # if it's a reg exp, then we just check if it matches
c1af2184
ERE
418 elif isinstance(e, re._pattern_type):
419 if e.match(path):
974408b5 420 return NO_MATCH
8a8fadda 421 else:
4bda6f45 422 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
8a8fadda
ERE
423
424 if self.filter_func:
425 return self.filter_func(path)
426
974408b5 427 return match
8a8fadda 428
283fbd5e 429 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
0708a374
ERE
430 '''
431 Walk a directory recursively, yielding each file/directory
0708a374
ERE
432 '''
433
283fbd5e 434 source_path = source_path.rstrip(os.sep)
0708a374 435
283fbd5e 436 if keep_base_dir:
adf7dac4 437 beginning_size = 0
283fbd5e
CH
438 else:
439 beginning_size = len(source_path) + 1 # +1 for os.sep
440
441 queue = [source_path]
442
d07c8065 443 while queue:
df86af81 444 cur_path = queue.pop(0)
0708a374 445
d86735e4
ERE
446 # it might have been removed in the mean time
447 if not os.path.exists(cur_path):
448 continue
449
7dec665c
CH
450 for filename in sorted(os.listdir(cur_path)):
451 child = os.path.join(cur_path, filename)
d07c8065
ERE
452 is_dir = os.path.isdir(child)
453 status = self.filter_path(child, source_path, is_dir)
7dec665c
CH
454 if status == NO_MATCH:
455 continue
456 if not os.access(child, os.R_OK):
4bda6f45 457 self.logger.warning('Error accessing possibly locked file %s' % child)
7dec665c 458 continue
8a8fadda 459
d07c8065 460 if status == MATCH:
adf7dac4 461 yield child[beginning_size:]
0708a374 462
d07c8065
ERE
463 if is_dir and (status == MATCH or status == PARENT_MATCH):
464 queue.append(child)
0708a374 465
e82f14f5
ERE
466 def _stat_dict(self, path):
467 '''
468 Returns a dict with the stat data used to compare files
469 '''
470 stinfo = os.stat(path)
471 mode = stinfo.st_mode
472
473 ptype = None
474 if stat.S_ISDIR(mode):
d07c8065 475 ptype = u'directory'
e82f14f5 476 elif stat.S_ISREG(mode):
d07c8065 477 ptype = u'file'
e82f14f5 478 elif stat.S_ISLNK(mode):
d07c8065 479 ptype = u'link'
e82f14f5
ERE
480
481 return {
d07c8065 482 u'type': ptype,
be60ffd0 483 u'path': path,
d07c8065 484 u'mode': mode,
0501fe0a
ERE
485 u'mtime': int(stinfo.st_mtime),
486 u'ctime': int(stinfo.st_ctime),
d07c8065
ERE
487 u'uid': stinfo.st_uid,
488 u'gid': stinfo.st_gid,
489 u'inode': stinfo.st_ino,
490 u'size': stinfo.st_size
e82f14f5
ERE
491 }
492
df99a044 493 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
d07c8065
ERE
494 '''
495 Return if the dicts are equal in the stat keys
496 '''
fc8fdcbc 497 keys = [u'type', u'mode',u'size', u'mtime',
d041935c 498 # not restored: u'inode', u'ctime'
df99a044 499 ]
8adbe50d 500
fc8fdcbc 501 # only if user is root, then also check gid/uid. otherwise do not check it,
d041935c 502 # because tarfile can chown in case of being superuser only
50d70ca9
PG
503 #
504 # also, skip the check in rpmbuild since the sources end up with the
505 # uid:gid of the packager while the extracted files are 0:0.
506 if hasattr(os, "geteuid") and os.geteuid() == 0 \
507 and os.getenv ("RPMBUILD_OPTIONS") is None:
fc8fdcbc
ERE
508 keys.append('gid')
509 keys.append('uid')
510
ea6d3c3e 511 if (not d1 and d2 != None) or (d1 != None and not d2):
8adbe50d
ERE
512 return False
513
cbac9f0b
ERE
514 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
515 return False
8adbe50d 516
fc8fdcbc
ERE
517 type = d1.get('type', '')
518
d07c8065 519 for key in keys:
fc8fdcbc
ERE
520 # size doesn't matter for directories
521 if type == 'directory' and key == 'size':
522 continue
d07c8065
ERE
523 if d1.get(key, -1) != d2.get(key, -2):
524 return False
525 return True
526
df99a044 527 def prefixed(self, path, listsnapshot_equal=False):
8adbe50d
ERE
528 '''
529 if a path is not prefixed, return it prefixed
530 '''
531 for prefix in self.__path_prefix_list:
532 if path.startswith(prefix):
df99a044
ERE
533 if listsnapshot_equal and prefix == u'list://':
534 return u'snapshot://' + path[len(prefix):]
8adbe50d
ERE
535 return path
536 return u'snapshot://' + path
537
538 def unprefixed(self, path):
539 '''
540 remove a path prefix if any
541 '''
542 for prefix in self.__path_prefix_list:
543 if path.startswith(prefix):
544 return path[len(prefix):]
545 return path
546
133d30da
PG
547
548 def initialize_encryption (self, mode):
549 password = self.password
1f3fd7b0
PG
550 key = self.crypto_key
551 nacl = self.nacl
133d30da 552
1f3fd7b0 553 if key is None and password is None:
133d30da
PG
554 return
555 if mode == CRYPTO_MODE_ENCRYPT:
1f3fd7b0
PG
556 return crypto.Encrypt (password=password,
557 key=key,
558 nacl=nacl,
54f909ca 559 version=self.crypto_version,
774ca538 560 paramversion=self.crypto_paramversion)
133d30da 561 if mode == CRYPTO_MODE_DECRYPT:
1f3fd7b0 562 return crypto.Decrypt (password=password, key=key)
133d30da
PG
563
564 raise Exception ("invalid encryption mode [%r]" % mode)
565
566
9eccb1c2 567 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
3a7e1a50 568 '''
9eccb1c2
PG
569 Given the specified configuration, opens a file for reading or writing,
570 inheriting the encryption and compression settings from the backup.
571 Returns a file object ready to use.
3fdea6d4 572
c8c72fe1
PG
573 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
574 respectively).
575 :type mode: str
774ca538
PG
576 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
577 Both the info and the auxiliary file have a globally
578 unique, constant counter value.
3fdea6d4 579 :type kind: str
3a7e1a50 580 '''
3a7e1a50
ERE
581 if self.index_mode.startswith('gz'):
582 comptype = 'gz'
583 elif self.index_mode.startswith('bz2'):
584 comptype = 'bz2'
585 else:
586 comptype = 'tar'
587
133d30da 588 crypto_ctx = None
6de9444a 589 enccounter = None
133d30da 590 if mode == "w":
774ca538 591 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 592 elif mode == "r":
774ca538 593 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
133d30da 594
3031b7ae
PG
595 if crypto_ctx is not None:
596 if kind == AUXILIARY_FILE_INFO:
597 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
598 elif kind == AUXILIARY_FILE_INDEX:
599 enccounter = crypto.AES_GCM_IV_CNT_INDEX
600 else:
601 raise Exception ("invalid kind of aux file %r" % kind)
602
c8c72fe1 603 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
3fdea6d4 604 bufsize=tarfile.RECORDSIZE, fileobj=None,
6de9444a 605 encryption=crypto_ctx, enccounter=enccounter)
c8c72fe1
PG
606
607 return sink
608
3a7e1a50 609
0708a374 610 def create_full_backup(self, source_path, backup_path,
d4a05db6 611 max_volume_size=None, extra_data=dict()):
0708a374
ERE
612 '''
613 Creates a full backup.
614
615 Parameters:
616 - source_path: source path to the directory to back up.
617 - backup_path: path where the back up will be stored. Backup path will
618 be created if not existent.
d5361dac
ERE
619 - max_volume_size: maximum volume size in megabytes. Used to split the
620 backup in volumes. Optional (won't split in volumes by default).
9eae9a1f
ERE
621 - extra_data: a json-serializable dictionary with information that you
622 want to be included in the header of the index file
0708a374
ERE
623 '''
624 # check input
be60ffd0 625 if not isinstance(source_path, str):
0708a374
ERE
626 raise Exception('Source path must be a string')
627
be60ffd0 628 if not isinstance(backup_path, str):
0708a374
ERE
629 raise Exception('Backup path must be a string')
630
631 if not os.path.exists(source_path) or not os.path.isdir(source_path):
632 raise Exception('Source path "%s" does not exist or is not a '\
633 'directory' % source_path)
634
d07c8065
ERE
635 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
636 max_volume_size < 1):
637 raise Exception('max_volume_size must be a positive integer')
d5361dac
ERE
638 if max_volume_size != None:
639 max_volume_size = max_volume_size*1024*1024
640
9eae9a1f
ERE
641 if not isinstance(extra_data, dict):
642 raise Exception('extra_data must be a dictionary')
643
644 try:
645 extra_data_str = json.dumps(extra_data)
646 except:
647 raise Exception('extra_data is not json-serializable')
648
0708a374
ERE
649 if not os.access(source_path, os.R_OK):
650 raise Exception('Source path "%s" is not readable' % source_path)
651
652 # try to create backup path if needed
653 if not os.path.exists(backup_path):
d4a05db6 654 os.makedirs(backup_path)
0708a374
ERE
655
656 if not os.access(backup_path, os.W_OK):
657 raise Exception('Backup path "%s" is not writeable' % backup_path)
658
659 if source_path.endswith('/'):
660 source_path = source_path[:-1]
661
662 if backup_path.endswith('/'):
663 backup_path = backup_path[:-1]
664
665 # update current time
666 self.current_time = datetime.datetime.now()
667
668 if self.mode not in self.__file_extensions_dict:
669 raise Exception('Unrecognized extension')
670
2ae46844 671 # setup for encrypting payload
774ca538
PG
672 if self.encryptor is None:
673 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
2ae46844 674
0708a374 675 # some initialization
11684b1d 676 self.vol_no = 0
0708a374
ERE
677
678 # generate the first volume name
679 vol_name = self.volume_name_func(backup_path, True, 0)
680 tarfile_path = os.path.join(backup_path, vol_name)
681
774ca538
PG
682 # init index
683 index_name = self.index_name_func(True)
684 index_path = os.path.join(backup_path, index_name)
685 index_sink = self.open_auxiliary_file(index_path, 'w')
e82f14f5 686
d5361dac
ERE
687 cwd = os.getcwd()
688
b7c47f38 689 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
0708a374
ERE
690 '''
691 Handles the new volumes
692 '''
d5361dac
ERE
693 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
694 volume_path = os.path.join(backup_path, volume_name)
11684b1d 695 deltarobj.vol_no = volume_number
d5361dac
ERE
696
697 # we convert relative paths into absolute because CWD is changed
698 if not os.path.isabs(volume_path):
699 volume_path = os.path.join(cwd, volume_path)
11684b1d 700
8e019196
ERE
701 if tarobj.fileobj is not None:
702 tarobj.fileobj.close()
703
b008f989
ERE
704 deltarobj.logger.debug("opening volume %s" % volume_path)
705
b7c47f38 706 tarobj.open_volume(volume_path, encryption=encryption)
d5361dac
ERE
707
708 # wraps some args from context into the handler
133d30da 709 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
0708a374 710
774ca538 711 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
6c678f3a 712
be60ffd0 713 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
6c678f3a 714 # calculate checksum and write into the stream
c2ffe2ec 715 crc = binascii.crc32(s) & 0xFFFFffff
774ca538 716 index_sink.write(s)
e82f14f5 717
0708a374
ERE
718 # start creating the tarfile
719 tarobj = tarfile.TarFile.open(tarfile_path,
da26094a 720 mode='w' + self.mode,
0708a374 721 format=tarfile.GNU_FORMAT,
d1c38f40 722 concat='#' in self.mode,
133d30da 723 encryption=self.encryptor,
0708a374 724 max_volume_size=max_volume_size,
ea625b04 725 new_volume_handler=new_volume_handler,
e2b59b34
ERE
726 save_to_members=False,
727 dereference=True)
e5c6ca04 728 os.chdir(source_path)
55b8686d
ERE
729
730 # for each file to be in the backup, do:
e82f14f5 731 for path in self._recursive_walk_dir('.'):
55b8686d 732 # calculate stat dict for current file
253d4cdd
ERE
733 statd = self._stat_dict(path)
734 statd['path'] = u'snapshot://' + statd['path']
735 statd['volume'] = self.vol_no
55b8686d
ERE
736
737 # backup file
253d4cdd 738 tarobj.add(path, arcname = statd['path'], recursive=False)
11684b1d 739
55b8686d 740 # retrieve file offset
253d4cdd 741 statd['offset'] = tarobj.get_last_member_offset()
b008f989 742 self.logger.debug("backup %s" % statd['path'])
6c678f3a 743
d041935c 744 # store the stat dict in the index
be60ffd0 745 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
6c678f3a 746 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 747 index_sink.write(s)
e82f14f5 748
be60ffd0 749 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
6c678f3a 750 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 751 index_sink.write(s)
be60ffd0 752 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
774ca538
PG
753 index_sink.write(s)
754
e5c6ca04 755 os.chdir(cwd)
0708a374 756 tarobj.close()
c8c72fe1 757 index_sink.close (close_fileobj=True)
938c2d54 758
0708a374 759 def create_diff_backup(self, source_path, backup_path, previous_index_path,
d4a05db6 760 max_volume_size=None, extra_data=dict()):
0708a374
ERE
761 '''
762 Creates a backup.
763
764 Parameters:
765 - source_path: source path to the directory to back up.
766 - backup_path: path where the back up will be stored. Backup path will
767 be created if not existent.
768 - previous_index_path: index of the previous backup, needed to know
769 which files changed since then.
770 - max_volume_size: maximum volume size in megabytes (MB). Used to split
771 the backup in volumes. Optional (won't split in volumes by default).
3a7e1a50
ERE
772
773 NOTE: previous index is assumed to follow exactly the same format as
774 the index_mode setup in the constructor.
0708a374 775 '''
d07c8065 776 # check/sanitize input
be60ffd0 777 if not isinstance(source_path, str):
d07c8065
ERE
778 raise Exception('Source path must be a string')
779
be60ffd0 780 if not isinstance(backup_path, str):
d07c8065
ERE
781 raise Exception('Backup path must be a string')
782
783 if not os.path.exists(source_path) or not os.path.isdir(source_path):
784 raise Exception('Source path "%s" does not exist or is not a '\
785 'directory' % source_path)
786
9eae9a1f
ERE
787 if not isinstance(extra_data, dict):
788 raise Exception('extra_data must be a dictionary')
789
790 try:
791 extra_data_str = json.dumps(extra_data)
792 except:
793 raise Exception('extra_data is not json-serializable')
794
d07c8065
ERE
795 if not os.access(source_path, os.R_OK):
796 raise Exception('Source path "%s" is not readable' % source_path)
797
798 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
799 max_volume_size < 1):
800 raise Exception('max_volume_size must be a positive integer')
801 if max_volume_size != None:
802 max_volume_size = max_volume_size*1024*1024
803
be60ffd0 804 if not isinstance(previous_index_path, str):
d07c8065
ERE
805 raise Exception('previous_index_path must be A string')
806
807 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
808 raise Exception('Index path "%s" does not exist or is not a '\
809 'file' % previous_index_path)
810
811 if not os.access(previous_index_path, os.R_OK):
812 raise Exception('Index path "%s" is not readable' % previous_index_path)
813
814 # try to create backup path if needed
815 if not os.path.exists(backup_path):
d4a05db6 816 os.makedirs(backup_path)
d07c8065
ERE
817
818 if not os.access(backup_path, os.W_OK):
819 raise Exception('Backup path "%s" is not writeable' % backup_path)
820
821 if source_path.endswith('/'):
822 source_path = source_path[:-1]
823
824 if backup_path.endswith('/'):
825 backup_path = backup_path[:-1]
826
827 # update current time
828 self.current_time = datetime.datetime.now()
829
830 if self.mode not in self.__file_extensions_dict:
831 raise Exception('Unrecognized extension')
832
2ae46844 833 # setup for encrypting payload
774ca538
PG
834 if self.encryptor is None:
835 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 836
d07c8065
ERE
837 # some initialization
838 self.vol_no = 0
839
840 # generate the first volume name
df86af81
ERE
841 vol_name = self.volume_name_func(backup_path, is_full=False,
842 volume_number=0)
d07c8065
ERE
843 tarfile_path = os.path.join(backup_path, vol_name)
844
938c2d54 845 # init index
d07c8065
ERE
846 cwd = os.getcwd()
847
3031b7ae
PG
848 index_name = self.index_name_func(is_full=False)
849 index_path = os.path.join(backup_path, index_name)
850 index_sink = self.open_auxiliary_file(index_path, 'w')
851
d07c8065
ERE
852 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
853 '''
854 Handles the new volumes
855 '''
df86af81
ERE
856 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
857 volume_number=volume_number)
d07c8065
ERE
858 volume_path = os.path.join(backup_path, volume_name)
859 deltarobj.vol_no = volume_number
860
861 # we convert relative paths into absolute because CWD is changed
862 if not os.path.isabs(volume_path):
863 volume_path = os.path.join(cwd, volume_path)
864
f624ff3d 865 deltarobj.logger.debug("opening volume %s" % volume_path)
d07c8065
ERE
866 tarobj.open_volume(volume_path)
867
868 # wraps some args from context into the handler
869 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
870
3031b7ae 871 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
d07c8065 872
be60ffd0 873 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
d07c8065 874 # calculate checksum and write into the stream
c2ffe2ec 875 crc = binascii.crc32(s) & 0xFFFFffff
3031b7ae 876 index_sink.write(s)
d07c8065
ERE
877
878 # start creating the tarfile
879 tarobj = tarfile.TarFile.open(tarfile_path,
880 mode='w' + self.mode,
881 format=tarfile.GNU_FORMAT,
d1c38f40 882 concat='#' in self.mode,
133d30da 883 encryption=self.encryptor,
d07c8065 884 max_volume_size=max_volume_size,
ea625b04 885 new_volume_handler=new_volume_handler,
e2b59b34
ERE
886 save_to_members=False,
887 dereference=True)
d07c8065 888
aae127d0
ERE
889
890 # create the iterators, first the previous index iterator, then the
891 # source path directory iterator and collate and iterate them
892 if not os.path.isabs(previous_index_path):
893 previous_index_path = os.path.join(cwd, previous_index_path)
894 index_it = self.iterate_index_path(previous_index_path)
895
d07c8065 896 os.chdir(source_path)
aae127d0
ERE
897 dir_it = self._recursive_walk_dir('.')
898 dir_path_it = self.jsonize_path_iterator(dir_it)
d07c8065 899
df86af81
ERE
900 def pr(path):
901 if not path:
902 return "None"
903 else:
904 return path["path"]
8edb2e3c 905
d07c8065 906 # for each file to be in the backup, do:
df86af81 907 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
aae127d0
ERE
908 action = None
909 # if file is not in the index, it means it's a new file, so we have
910 # to take a snapshot
df86af81 911
aae127d0
ERE
912 if not ipath:
913 action = 'snapshot'
914 # if the file is not in the directory iterator, it means that it has
d041935c 915 # been deleted, so we need to mark it as such
aae127d0
ERE
916 elif not dpath:
917 action = 'delete'
918 # if the file is in both iterators, it means it might have either
919 # not changed (in which case we will just list it in our index but
920 # it will not be included in the tar file), or it might have
e8d95fe5 921 # changed, in which case we will snapshot it.
aae127d0
ERE
922 elif ipath and dpath:
923 if self._equal_stat_dicts(ipath, dpath):
924 action = 'list'
925 else:
926 action = 'snapshot'
927 # TODO: when creating chained backups (i.e. diffing from another
928 # diff), we will need to detect the type of action in the previous
929 # index, because if it was delete and dpath is None, we should
930 # discard the file
931
932 if action == 'snapshot':
933 # calculate stat dict for current file
934 stat = dpath.copy()
be60ffd0 935 stat['path'] = "snapshot://" + dpath['path']
aae127d0
ERE
936 stat['volume'] = self.vol_no
937
50f43227
ERE
938 self.logger.debug("[STORE] %s" % dpath['path'])
939
aae127d0 940 # backup file
8adbe50d 941 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
aae127d0
ERE
942
943 # retrieve file offset
944 stat['offset'] = tarobj.get_last_member_offset()
aae127d0 945 elif action == 'delete':
50f43227 946 path = self.unprefixed(ipath['path'])
aae127d0 947 stat = {
50f43227 948 u'path': u'delete://' + path,
aae127d0
ERE
949 u'type': ipath['type']
950 }
50f43227 951 self.logger.debug("[DELETE] %s" % path)
aae127d0
ERE
952
953 # mark it as deleted in the backup
42d39ca7 954 tarobj.add("/dev/null", arcname=stat['path'])
aae127d0
ERE
955 elif action == 'list':
956 stat = dpath.copy()
50f43227
ERE
957 path = self.unprefixed(ipath['path'])
958 stat['path'] = u'list://' + path
aae127d0 959 # unchanged files do not enter in the backup, only in the index
50f43227 960 self.logger.debug("[UNCHANGED] %s" % path)
80910564
TJ
961 else:
962 # should not happen
4bda6f45 963 self.logger.warning('unknown action in create_diff_backup: {0}'
80910564
TJ
964 ''.format(action))
965 stat = None
aae127d0 966
80910564
TJ
967 if stat:
968 # store the stat dict in the index
be60ffd0 969 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
aae127d0 970 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 971 index_sink.write(s)
aae127d0 972
be60ffd0 973 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
aae127d0 974 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 975 index_sink.write(s)
be60ffd0 976 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
3031b7ae 977 index_sink.write(s)
938c2d54 978
df86af81 979 index_it.release()
aae127d0
ERE
980 os.chdir(cwd)
981 tarobj.close()
938c2d54
PG
982 index_sink.close()
983
984
d07c8065 985 def iterate_index_path(self, index_path):
df86af81
ERE
986 '''
987 Returns an index iterator. Internally, it uses a classic iterator class.
988 We do that instead of just yielding so that the iterator object can have
989 an additional function to close the file descriptor that is opened in
990 the constructor.
991 '''
d07c8065 992
df86af81
ERE
993 class IndexPathIterator(object):
994 def __init__(self, delta_tar, index_path):
995 self.delta_tar = delta_tar
996 self.index_path = index_path
997 self.f = None
9eae9a1f 998 self.extra_data = dict()
df86af81 999 self.__enter__()
d07c8065 1000
df86af81
ERE
1001 def __iter__(self):
1002 return self
d07c8065 1003
df86af81
ERE
1004 def release(self):
1005 if self.f:
1006 self.f.close()
1007
1008 def __enter__(self):
1009 '''
1010 Allows this iterator to be used with the "with" statement
1011 '''
1012 if self.f is None:
9eccb1c2 1013 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
df86af81
ERE
1014 # check index header
1015 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1016 if j.get("type", '') != 'python-delta-tar-index' or\
1017 j.get('version', -1) != 1:
1018 raise Exception("invalid index file format: %s" % json.dumps(j))
1019
9eae9a1f
ERE
1020 self.extra_data = j.get('extra_data', dict())
1021
df86af81
ERE
1022 # find BEGIN-FILE-LIST, ignore other headers
1023 while True:
1024 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1025 if j.get('type', '') == 'BEGIN-FILE-LIST':
1026 break
1027 return self
1028
1029 def __exit__(self, type, value, tb):
1030 '''
1031 Allows this iterator to be used with the "with" statement
1032 '''
ec57ce53
ERE
1033 if self.f:
1034 self.f.close()
df86af81 1035 self.f = None
d07c8065 1036
be60ffd0 1037 def __next__(self):
0349168a 1038 # read each file in the index and process it to do the restore
df86af81
ERE
1039 j = {}
1040 l_no = -1
1041 try:
1042 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
be60ffd0 1043 except Exception as e:
df86af81
ERE
1044 if self.f:
1045 self.f.close()
1046 raise e
d07c8065 1047
df86af81 1048 op_type = j.get('type', '')
d07c8065 1049
df86af81
ERE
1050 # when we detect the end of the list, break the loop
1051 if op_type == 'END-FILE-LIST':
1052 if self.f:
1053 self.f.close()
1054 raise StopIteration
1055
1056 # check input
1057 if op_type not in ['directory', 'file', 'link']:
4bda6f45 1058 self.delta_tar.logger.warning('unrecognized type to be '
df86af81
ERE
1059 'restored: %s, line %d' % (op_type, l_no))
1060 # iterate again
be60ffd0 1061 return self.__next__()
df86af81
ERE
1062
1063 return j, l_no
d07c8065 1064
df86af81 1065 return IndexPathIterator(self, index_path)
d07c8065 1066
26fdd428 1067 def iterate_tar_path(self, tar_path, new_volume_handler=None):
24ddf0a2
ERE
1068 '''
1069 Returns a tar iterator that iterates jsonized member items that contain
1070 an additional "member" field, used by RestoreHelper.
1071 '''
ec57ce53 1072 class TarPathIterator(object):
83a81852 1073 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
24ddf0a2 1074 self.delta_tar = delta_tar
ec57ce53 1075 self.tar_path = tar_path
24ddf0a2 1076 self.tar_obj = None
6bca471c 1077 self.last_member = None
26fdd428 1078 self.new_volume_handler = new_volume_handler
24ddf0a2
ERE
1079 self.__enter__()
1080
1081 def __iter__(self):
1082 return self
1083
1084 def release(self):
1085 if self.tar_obj:
1086 self.tar_obj.close()
1087
1088 def __enter__(self):
1089 '''
1090 Allows this iterator to be used with the "with" statement
1091 '''
1092 if self.tar_obj is None:
d5e1d60f
PG
1093 decryptor = None
1094 if self.delta_tar.password is not None:
1f3fd7b0
PG
1095 decryptor = crypto.Decrypt \
1096 (password=self.delta_tar.password,
1097 key=self.delta_tar.crypto_key)
ec57ce53
ERE
1098 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1099 mode='r' + self.delta_tar.mode,
1100 format=tarfile.GNU_FORMAT,
d1c38f40 1101 concat='#' in self.delta_tar.mode,
d5e1d60f 1102 encryption=decryptor,
83a81852 1103 new_volume_handler=self.new_volume_handler,
e2b59b34
ERE
1104 save_to_members=False,
1105 dereference=True)
24ddf0a2
ERE
1106 return self
1107
1108 def __exit__(self, type, value, tb):
1109 '''
1110 Allows this iterator to be used with the "with" statement
1111 '''
ec57ce53
ERE
1112 if self.tar_obj:
1113 self.tar_obj.close()
24ddf0a2
ERE
1114 self.tar_obj = None
1115
be60ffd0 1116 def __next__(self):
24ddf0a2
ERE
1117 '''
1118 Read each member and return it as a stat dict
1119 '''
be60ffd0 1120 tarinfo = self.tar_obj.__iter__().__next__()
8e019196
ERE
1121 # NOTE: here we compare if tarinfo.path is the same as before
1122 # instead of comparing the tarinfo object itself because the
1123 # object itself might change for multivol tarinfos
1124 if tarinfo is None or (self.last_member is not None and\
1125 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
ec57ce53
ERE
1126 raise StopIteration
1127
6bca471c
ERE
1128 self.last_member = tarinfo
1129
24ddf0a2
ERE
1130 ptype = 'unknown'
1131 if tarinfo.isfile():
1132 ptype = 'file'
1133 elif tarinfo.isdir():
ab7e7465 1134 ptype = 'directory'
24ddf0a2
ERE
1135 elif tarinfo.islnk() or tarinfo.issym():
1136 ptype = 'link'
1137
1138 return {
1139 u'type': ptype,
1140 u'path': tarinfo.path,
1141 u'mode': tarinfo.mode,
1142 u'mtime': tarinfo.mtime,
1143 u'ctime': -1, # cannot restore
1144 u'uid': tarinfo.uid,
1145 u'gid': tarinfo.gid,
1146 u'inode': -1, # cannot restore
1147 u'size': tarinfo.size,
1148 u'member': tarinfo
ec57ce53
ERE
1149 }, 0
1150
26fdd428 1151 return TarPathIterator(self, tar_path, new_volume_handler)
24ddf0a2 1152
df99a044 1153 def jsonize_path_iterator(self, iter, strip=0):
d07c8065
ERE
1154 '''
1155 converts the yielded items of an iterator into json path lines.
df99a044
ERE
1156
1157 strip: Strip the smallest prefix containing num leading slashes from
1158 the file path.
d07c8065
ERE
1159 '''
1160 while True:
1161 try:
be60ffd0 1162 path = iter.__next__()
df99a044 1163 if strip == 0:
4ac6d333 1164 yield self._stat_dict(path), 0
df99a044
ERE
1165 else:
1166 st = self._stat_dict(path)
1167 st['path'] = "/".join(path.split("/")[strip:])
4ac6d333 1168 yield st, 0
d07c8065
ERE
1169 except StopIteration:
1170 break
1171
1172 def collate_iterators(self, it1, it2):
1173 '''
1174 Collate two iterators, so that it returns pairs of the items of each
1175 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1176 when there's no match for the items in the other iterator.
1177
1178 It assumes that the items in both lists are ordered in the same way.
1179 '''
ea6d3c3e 1180 l_no = 0
d07c8065
ERE
1181 elem1, elem2 = None, None
1182 while True:
1183 if not elem1:
1184 try:
be60ffd0 1185 elem1, l_no = it1.__next__()
d07c8065
ERE
1186 except StopIteration:
1187 if elem2:
ea6d3c3e 1188 yield (None, elem2, l_no)
d07c8065 1189 for elem2 in it2:
ea6d3c3e
ERE
1190 if isinstance(elem2, tuple):
1191 elem2 = elem2[0]
1192 yield (None, elem2, l_no)
d07c8065 1193 break
d07c8065
ERE
1194 if not elem2:
1195 try:
be60ffd0 1196 elem2 = it2.__next__()
d07c8065
ERE
1197 if isinstance(elem2, tuple):
1198 elem2 = elem2[0]
1199 except StopIteration:
1200 if elem1:
ea6d3c3e 1201 yield (elem1, None, l_no)
df99a044 1202 for elem1, l_no in it1:
ea6d3c3e 1203 yield (elem1, None, l_no)
d07c8065 1204 break
670f9934
ERE
1205
1206 index1 = self.unprefixed(elem1['path'])
1207 index2 = self.unprefixed(elem2['path'])
1208 i1, i2 = self.compare_indexes(index1, index2)
1209
1210 yield1 = yield2 = None
1211 if i1 is not None:
1212 yield1 = elem1
1213 elem1 = None
1214 if i2 is not None:
1215 yield2 = elem2
1216 elem2 = None
1217 yield (yield1, yield2, l_no)
1218
1219 def compare_indexes(self, index1, index2):
1220 '''
1221 Compare iterator indexes and return a tuple in the following form:
1222 if index1 < index2, returns (index1, None)
1223 if index1 == index2 returns (index1, index2)
1224 else: returns (None, index2)
1225 '''
1226 l1 = index1.split('/')
1227 l2 = index2.split('/')
1228 length = len(l2) - len(l1)
1229
1230 if length > 0:
1231 return (index1, None)
1232 elif length < 0:
1233 return (None, index2)
1234
1235 for i1, i2 in zip(l1, l2):
1236 if i1 < i2:
1237 return (index1, None)
1238 elif i1 > i2:
1239 return (None, index2)
1240
1241 return (index1, index2)
0708a374 1242
8c65a2b1 1243 def list_backup(self, backup_tar_path, list_func=None):
be60ffd0 1244 if not isinstance(backup_tar_path, str):
8c65a2b1
ERE
1245 raise Exception('Backup tar path must be a string')
1246
1247 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1248 raise Exception('Source path "%s" does not exist or is not a '\
1249 'file' % backup_tar_path)
1250
1251 if not os.access(backup_tar_path, os.R_OK):
1252 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1253
1254 cwd = os.getcwd()
1255
b7c47f38 1256 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
8c65a2b1
ERE
1257 '''
1258 Handles the new volumes
1259 '''
1260 volume_name = deltarobj.volume_name_func(backup_path, True,
1261 volume_number, guess_name=True)
1262 volume_path = os.path.join(backup_path, volume_name)
1263
1264 # we convert relative paths into absolute because CWD is changed
1265 if not os.path.isabs(volume_path):
1266 volume_path = os.path.join(cwd, volume_path)
b7c47f38
PG
1267 tarobj.open_volume(volume_path, encryption=encryption)
1268
774ca538
PG
1269 if self.decryptor is None:
1270 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
8c65a2b1
ERE
1271
1272 backup_path = os.path.dirname(backup_tar_path)
1273 if not os.path.isabs(backup_path):
1274 backup_path = os.path.join(cwd, backup_path)
133d30da 1275 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
b7a6566b 1276
8c65a2b1
ERE
1277 tarobj = tarfile.TarFile.open(backup_tar_path,
1278 mode='r' + self.mode,
1279 format=tarfile.GNU_FORMAT,
d1c38f40 1280 concat='#' in self.mode,
133d30da 1281 encryption=self.decryptor,
ea625b04 1282 new_volume_handler=new_volume_handler,
e2b59b34
ERE
1283 save_to_members=False,
1284 dereference=True)
8c65a2b1
ERE
1285
1286 def filter(cls, list_func, tarinfo):
1287 if list_func is None:
b008f989 1288 self.logger.info(tarinfo.path)
8c65a2b1
ERE
1289 else:
1290 list_func(tarinfo)
1291 return False
1292 filter = partial(filter, self, list_func)
1293
1294 tarobj.extractall(filter=filter)
1295 tarobj.close()
1296
0708a374 1297 def restore_backup(self, target_path, backup_indexes_paths=[],
e93f83f1 1298 backup_tar_path=None, restore_callback=None,
04f4c7ab 1299 disaster=tarfile.TOLERANCE_STRICT):
0708a374
ERE
1300 '''
1301 Restores a backup.
1302
1303 Parameters:
0708a374
ERE
1304 - target_path: path to restore.
1305 - backup_indexes_paths: path to backup indexes, in descending date order.
1306 The indexes indicate the location of their respective backup volumes,
1307 and multiple indexes are needed to be able to restore diff backups.
1308 Note that this is an optional parameter: if not suplied, it will
1309 try to restore directly from backup_tar_path.
1310 - backup_tar_path: path to the backup tar file. Used as an alternative
1311 to backup_indexes_paths to restore directly from a tar file without
1312 using any file index. If it's a multivol tarfile, volume_name_func
1313 will be called.
4da27cfe 1314 - restore_callback: callback function to be called during restore.
b0aef801 1315 This is passed to the helper and gets called for every file.
11684b1d 1316
3a7e1a50 1317 NOTE: If you want to use an index to restore a backup, this function
11684b1d
ERE
1318 only supports to do so when the tarfile mode is either uncompressed or
1319 uses concat compress mode, because otherwise it would be very slow.
3a7e1a50
ERE
1320
1321 NOTE: Indices are assumed to follow the same format as the index_mode
1322 specified in the constructor.
e93f83f1
PG
1323
1324 Returns the list of files that could not be restored, if there were
1325 any.
0708a374 1326 '''
11684b1d 1327 # check/sanitize input
be60ffd0 1328 if not isinstance(target_path, str):
e5c6ca04
ERE
1329 raise Exception('Target path must be a string')
1330
11684b1d
ERE
1331 if backup_indexes_paths is None and backup_tar_path == []:
1332 raise Exception("You have to either provide index paths or a tar path")
e5c6ca04 1333
ea6d3c3e
ERE
1334 if len(backup_indexes_paths) == 0:
1335 mode = "tar"
1336 else:
1337 mode = "diff"
1338
1339 if mode == "tar":
be60ffd0 1340 if not isinstance(backup_tar_path, str):
11684b1d
ERE
1341 raise Exception('Backup tar path must be a string')
1342
1343 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1344 raise Exception('Source path "%s" does not exist or is not a '\
1345 'file' % backup_tar_path)
1346
1347 if not os.access(backup_tar_path, os.R_OK):
1348 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1349 else:
1350 if not isinstance(backup_indexes_paths, list):
1351 raise Exception('backup_indexes_paths must be a list')
1352
1353 if self.mode.startswith(':') or self.mode.startswith('|'):
1354 raise Exception('Restore only supports either uncompressed tars'
1355 ' or concat compression when restoring from an index, and '
1356 ' the open mode you provided is "%s"' % self.mode)
1357
1358 for index in backup_indexes_paths:
be60ffd0 1359 if not isinstance(index, str):
11684b1d 1360 raise Exception('indices must be strings')
e5c6ca04 1361
11684b1d
ERE
1362 if not os.path.exists(index) or not os.path.isfile(index):
1363 raise Exception('Index path "%s" does not exist or is not a '\
1364 'file' % index)
1365
1366 if not os.access(index, os.R_OK):
1367 raise Exception('Index path "%s" is not readable' % index)
e5c6ca04
ERE
1368
1369 # try to create backup path if needed
1370 if not os.path.exists(target_path):
1371 os.makedirs(target_path)
1372
ec57ce53
ERE
1373 # make backup_tar_path absolute so that iterate_tar_path works fine
1374 if backup_tar_path and not os.path.isabs(backup_tar_path):
1375 backup_tar_path = os.path.abspath(backup_tar_path)
1376
d5361dac 1377 cwd = os.getcwd()
ec57ce53 1378 os.chdir(target_path)
d5361dac 1379
2ae46844 1380 # setup for decrypting payload
774ca538
PG
1381 if self.decryptor is None:
1382 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
2ae46844 1383
ea6d3c3e 1384 if mode == 'tar':
24ddf0a2
ERE
1385 index_it = self.iterate_tar_path(backup_tar_path)
1386 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
ec57ce53 1387 tarobj=index_it.tar_obj)
ea6d3c3e 1388 elif mode == "diff":
04f4c7ab
PG
1389 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1390 disaster=disaster)
f3d10816
PG
1391 try:
1392 # get iterator from newest index at _data[0]
1393 index1 = helper._data[0]["path"]
1394 index_it = self.iterate_index_path(index1)
1395 except tarfile.DecryptionError as exn:
1396 self.logger.error("failed to decrypt file [%s]: %s; is this an "
afc87ebc
PG
1397 "actual encrypted index file?"
1398 % (index1, str (exn)))
1399 return [(index1, exn)]
1400 except Exception as exn:
1401 # compressed files
1402 self.logger.error("failed to read file [%s]: %s; is this an "
1403 "actual index file?" % (index1, str (exn)))
f3d10816 1404 return [(index1, exn)]
d07c8065 1405
24ddf0a2
ERE
1406 dir_it = self._recursive_walk_dir('.')
1407 dir_path_it = self.jsonize_path_iterator(dir_it)
11684b1d 1408
e93f83f1
PG
1409 failed = [] # irrecoverable files
1410
a395759e 1411 # for each file to be restored, do:
24ddf0a2
ERE
1412 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1413 if not ipath:
1414 upath = dpath['path']
1415 op_type = dpath['type']
1416 else:
1417 upath = self.unprefixed(ipath['path'])
1418 op_type = ipath['type']
42c04ead 1419
24ddf0a2 1420 # filter paths
75059f3c 1421 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
24ddf0a2 1422 continue
ea6d3c3e 1423
24ddf0a2
ERE
1424 # if types of the file mismatch, the file needs to be deleted
1425 # and re-restored
1426 if ipath is not None and dpath is not None and\
1427 dpath['type'] != ipath['type']:
1428 helper.delete(upath)
1429
1430 # if file not found in dpath, we can directly restore from index
1431 if not dpath:
1432 # if the file doesn't exist and it needs to be deleted, it
1433 # means that work is already done
1434 if ipath['path'].startswith('delete://'):
ea6d3c3e 1435 continue
24ddf0a2 1436 try:
b008f989 1437 self.logger.debug("restore %s" % ipath['path'])
4da27cfe 1438 helper.restore(ipath, l_no, restore_callback)
be60ffd0 1439 except Exception as e:
e93f83f1 1440 iipath = ipath.get ("path", "")
7b07645e 1441 self.logger.error("FAILED to restore: {} ({})"
e93f83f1 1442 .format(iipath, e))
04f4c7ab 1443 if disaster != tarfile.TOLERANCE_STRICT:
e93f83f1 1444 failed.append ((iipath, e))
24ddf0a2 1445 continue
11684b1d 1446
24ddf0a2
ERE
1447 # if both files are equal, we have nothing to restore
1448 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1449 continue
1450
1451 # we have to restore the file, but first we need to delete the
1452 # current existing file.
1453 # we don't delete the file if it's a directory, because it might
1454 # just have changed mtime, so it's quite inefficient to remove
1455 # it
1456 if ipath:
1457 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
42c04ead 1458 helper.delete(upath)
b008f989 1459 self.logger.debug("restore %s" % ipath['path'])
e93f83f1
PG
1460 try:
1461 helper.restore(ipath, l_no, restore_callback)
1462 except Exception as e:
04f4c7ab 1463 if disaster == tarfile.TOLERANCE_STRICT:
e93f83f1
PG
1464 raise
1465 failed.append ((ipath.get ("path", ""), e))
1466 continue
24ddf0a2
ERE
1467
1468 # if the file is not in the index (so it comes from the target
1469 # directory) then we have to delete it
1470 else:
c9d47a03 1471 self.logger.debug("delete %s" % upath)
24ddf0a2 1472 helper.delete(upath)
42c04ead 1473
ec57ce53
ERE
1474 helper.restore_directories_permissions()
1475 index_it.release()
1476 os.chdir(cwd)
1477 helper.cleanup()
ea6d3c3e 1478
e93f83f1
PG
1479 return failed
1480
1481
1482 def recover_backup(self, target_path, backup_indexes_paths=[],
1483 restore_callback=None):
1484 """
1485 Walk the index, extracting objects in disaster mode. Bad files are
1486 reported along with a reason.
1487 """
1488 return self.restore_backup(target_path,
1489 backup_indexes_paths=backup_indexes_paths,
04f4c7ab
PG
1490 disaster=tarfile.TOLERANCE_RECOVER)
1491
1492
1493 def rescue_backup(self, target_path, backup_indexes_paths=[],
1494 restore_callback=None):
1495 """
1496 More aggressive “unfsck” mode: do not rely on the index data as the
1497 files may be corrupt; skim files for header-like information and
1498 attempt to retrieve the data.
1499 """
1500 return self.restore_backup(target_path,
1501 backup_indexes_paths=backup_indexes_paths,
1502 disaster=tarfile.TOLERANCE_RESCUE)
e93f83f1
PG
1503
1504
11684b1d
ERE
1505 def _parse_json_line(self, f, l_no):
1506 '''
ee0e095f 1507 Read line from file like object and process it as JSON.
11684b1d
ERE
1508 '''
1509 l = f.readline()
1510 l_no += 1
1511 try:
be60ffd0 1512 j = json.loads(l.decode('UTF-8'))
ee0e095f
PG
1513 except UnicodeDecodeError as e:
1514 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1515 raise Exception \
1516 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1517 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1518 from e
1519 raise Exception \
1520 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1521 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1522 from e
be60ffd0 1523 except ValueError as e:
11684b1d
ERE
1524 raise Exception("error parsing this json line "
1525 "(line number %d): %s" % (l_no, l))
1526 return j, l_no
ea6d3c3e 1527
24ddf0a2 1528
ea6d3c3e
ERE
1529class RestoreHelper(object):
1530 '''
1531 Class used to help to restore files from indices
1532 '''
1533
1534 # holds the dicts of data
1535 _data = []
1536
1537 _deltatar = None
1538
1539 _cwd = None
1540
0501fe0a
ERE
1541 # list of directories to be restored. This is done as a last step, see
1542 # tarfile.extractall for details.
1543 _directories = []
1544
04f4c7ab 1545 _disaster = tarfile.TOLERANCE_STRICT
e93f83f1 1546
037994ca 1547 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
04f4c7ab 1548 tarobj=None, disaster=tarfile.TOLERANCE_STRICT):
ea6d3c3e
ERE
1549 '''
1550 Constructor opens the tars and init the data structures.
1551
037994ca
PG
1552 Assumptions:
1553
1554 - Index list must be provided in reverse order (newer first).
1555 - “newer first” apparently means that if there are n backups
1556 provided, the last full backup is at index n-1 and the most recent
1557 diff backup is at index 0.
1558 - Only the first, the second, and the last elements of
1559 ``index_list`` are relevant, others will not be accessed.
1560 - If no ``index_list`` is provided, both ``tarobj`` and
1561 ``backup_path`` must be passed.
1562 - If ``index_list`` is provided, the values of ``tarobj`` and
1563 ``backup_path`` are ignored.
ea6d3c3e
ERE
1564 '''
1565 self._data = []
0501fe0a 1566 self._directories = []
ea6d3c3e
ERE
1567 self._deltatar = deltatar
1568 self._cwd = cwd
3031b7ae 1569 self._password = deltatar.password
1f3fd7b0 1570 self._crypto_key = deltatar.crypto_key
3031b7ae 1571 self._decryptors = []
e93f83f1 1572 self._disaster = disaster
ea6d3c3e 1573
253d4cdd
ERE
1574 try:
1575 import grp, pwd
1576 except ImportError:
1577 grp = pwd = None
1578
1579 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1580 self.canchown = True
1581 else:
1582 self.canchown = False
1583
037994ca 1584 if index_list is not None:
24ddf0a2 1585 for index in index_list:
037994ca 1586 is_full = index == index_list[-1]
24ddf0a2 1587
d5e1d60f 1588 decryptor = None
3031b7ae 1589 if self._password is not None:
1f3fd7b0
PG
1590 decryptor = crypto.Decrypt (password=self._password,
1591 key=self._crypto_key)
d5e1d60f 1592
24ddf0a2
ERE
1593 # make paths absolute to avoid cwd problems
1594 if not os.path.isabs(index):
1595 index = os.path.normpath(os.path.join(cwd, index))
1596
1597 s = dict(
1598 curr_vol_no = None,
1599 vol_fd = None,
1600 offset = -1,
1601 tarobj = None,
1602 path = index,
1603 is_full = is_full,
1604 iterator = None,
1605 last_itelement = None,
1606 last_lno = 0,
1607 new_volume_handler = partial(self.new_volume_handler,
1608 self._deltatar, self._cwd, is_full,
d5e1d60f
PG
1609 os.path.dirname(index), decryptor),
1610 decryptor = decryptor
24ddf0a2
ERE
1611 )
1612 self._data.append(s)
1613 else:
ea6d3c3e 1614 # make paths absolute to avoid cwd problems
24ddf0a2
ERE
1615 if not os.path.isabs(backup_path):
1616 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
ea6d3c3e 1617
ec57ce53
ERE
1618 # update the new_volume_handler of tar_obj
1619 tarobj.new_volume_handler = partial(self.new_volume_handler,
b7c47f38 1620 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
133d30da 1621 self._deltatar.decryptor)
ea6d3c3e
ERE
1622 s = dict(
1623 curr_vol_no = None,
1624 vol_fd = None,
1625 offset = -1,
24ddf0a2
ERE
1626 tarobj = tarobj,
1627 path = backup_path,
1628 is_full = True,
670f9934
ERE
1629 iterator = None,
1630 last_itelement = None,
1631 last_lno = 0,
d5e1d60f
PG
1632 new_volume_handler = tarobj.new_volume_handler,
1633 decryptor = self._deltatar.decryptor
ea6d3c3e
ERE
1634 )
1635 self._data.append(s)
1636
3031b7ae 1637
ea6d3c3e
ERE
1638 def cleanup(self):
1639 '''
1640 Closes all open files
1641 '''
1642 for data in self._data:
55b2ffd0
ERE
1643 if data['vol_fd']:
1644 data['vol_fd'].close()
1645 data['vol_fd'] = None
ea6d3c3e
ERE
1646 if data['tarobj']:
1647 data['tarobj'].close()
1648 data['tarobj'] = None
ea6d3c3e
ERE
1649
1650 def delete(self, path):
1651 '''
1652 Delete a file
1653 '''
df99a044
ERE
1654 if not os.path.exists(path):
1655 return
1656
24ddf0a2 1657 # to preserve parent directory mtime, we save it
283fbd5e 1658 parent_dir = os.path.dirname(path) or os.getcwd()
24ddf0a2
ERE
1659 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1660
561bc39f 1661 if os.path.isdir(path) and not os.path.islink(path):
ea6d3c3e
ERE
1662 shutil.rmtree(path)
1663 else:
1664 os.unlink(path)
1665
24ddf0a2
ERE
1666 # now we restore parent_directory mtime
1667 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1668
4da27cfe 1669 def restore(self, itpath, l_no, callback=None):
ea6d3c3e 1670 '''
8a54d5dd 1671 Restore the path from the appropriate backup. Receives the current path
e8d95fe5 1672 from the newest (=first) index iterator. itpath must be not null.
b0aef801 1673 callback is a custom function that gets called for every file.
037994ca
PG
1674
1675 NB: This function takes the attribute ``_data`` as input but will only
1676 ever use its first and, if available, second element. Anything else in
1677 ``._data[]`` will be ignored.
ea6d3c3e 1678 '''
ea6d3c3e
ERE
1679 path = itpath['path']
1680
4da27cfe
SA
1681 # Calls the callback function
1682 if callback:
1683 callback()
1684
ea6d3c3e 1685 if path.startswith('delete://'):
df86af81
ERE
1686 # the file has previously been deleted already in restore_backup in
1687 # all cases so we just need to finish
ea6d3c3e 1688 return
df86af81 1689
e8d95fe5 1690 # get data from newest index (_data[0])
df86af81
ERE
1691 data = self._data[0]
1692 upath = self._deltatar.unprefixed(path)
1693
24ddf0a2 1694 # to preserve parent directory mtime, we save it
283fbd5e 1695 parent_dir = os.path.dirname(upath) or os.getcwd()
ec57ce53
ERE
1696 if not os.path.exists(parent_dir):
1697 os.makedirs(parent_dir)
24ddf0a2
ERE
1698 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1699
e8d95fe5 1700 # if path is found in the newest index as to be snapshotted, deal with it
df86af81
ERE
1701 # and finish
1702 if path.startswith('snapshot://'):
e93f83f1
PG
1703 try:
1704 self.restore_file(itpath, data, path, l_no, upath)
1705 except Exception:
1706 raise
24ddf0a2
ERE
1707
1708 # now we restore parent_directory mtime
1709 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
ea6d3c3e
ERE
1710 return
1711
1712 # we go from index to index, finding the path in the index, then finding
1713 # the index with the most recent snapshot of the file being restored
e8d95fe5
TJ
1714 #
1715 # Right now we support diff backups, only. No incremental backups.
1716 # As a result _data[0] is always the diff backup index
1717 # and _data[1] the full backup index.
527670c4 1718 if len(self._data) == 2:
7273719c 1719 data = self._data[1]
527670c4
TJ
1720 d, l_no, dpath = self.find_path_in_index(data, upath)
1721 if not d:
1722 self._deltatar.logger.warning('Error restoring file %s from '
1723 'index, not found in index %s' % (path, data['path']))
1724 return
1725
1726 cur_path = d.get('path', '')
1727 if cur_path.startswith('delete://'):
1728 self._deltatar.logger.warning(('Strange thing happened, file '
1729 '%s was listed in first index but deleted by another '
1730 'one. Path was ignored and untouched.') % path)
1731 return
1732 elif cur_path.startswith('snapshot://'):
1733 # this code path is reached when the file is unchanged
1734 # in the newest index and therefore of type 'list://'
1735 self.restore_file(d, data, path, l_no, dpath)
1736
1737 # now we restore parent_directory mtime
1738 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1739 return
1740
1741 # error code path is reached when:
1742 # a) we have more than two indexes (unsupported atm)
1743 # b) both indexes contain a list:// entry (logic error)
1744 # c) we have just one index and it also contains list://
4bda6f45 1745 self._deltatar.logger.warning(('Error restoring file %s from index, '
ea6d3c3e
ERE
1746 'snapshot not found in any index') % path)
1747
670f9934
ERE
1748 def find_path_in_index(self, data, upath):
1749 # NOTE: we restart the iterator sometimes because the iterator can be
1750 # walked over completely multiple times, for example if one path if not
1751 # found in one index and we have to go to the next index.
7273719c
PG
1752 it = data['iterator']
1753 if it is None:
670f9934 1754 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
be60ffd0 1755 d, l_no = it.__next__()
670f9934 1756 else:
670f9934
ERE
1757 d = data['last_itelement']
1758 l_no = data['last_lno']
1759
670f9934 1760 while True:
7273719c 1761 dpath = self._deltatar.unprefixed(d.get('path', ''))
670f9934
ERE
1762 if upath == dpath:
1763 data['last_itelement'] = d
1764 data['last_lno'] = l_no
1765 return d, l_no, dpath
1766
1767 up, dp = self._deltatar.compare_indexes(upath, dpath)
1768 # any time upath should have appeared before current dpath, it means
1769 # upath is just not in this index and we should stop
1770 if dp is None:
1771 data['last_itelement'] = d
1772 data['last_lno'] = l_no
1773 return None, 0, ''
1774
1775 try:
be60ffd0 1776 d, l_no = it.__next__()
670f9934
ERE
1777 except StopIteration:
1778 data['last_itelement'] = d
1779 data['last_lno'] = l_no
1780 return None, 0, ''
670f9934 1781
0501fe0a
ERE
1782 def restore_directories_permissions(self):
1783 '''
1784 Restore directory permissions when everything have been restored
1785 '''
42c04ead
ERE
1786 try:
1787 import grp, pwd
1788 except ImportError:
1789 grp = pwd = None
1790
0501fe0a
ERE
1791 self._directories.sort(key=operator.attrgetter('name'))
1792 self._directories.reverse()
0501fe0a
ERE
1793
1794 # Set correct owner, mtime and filemode on directories.
1795 for member in self._directories:
1796 dirpath = member.name
1797 try:
42c04ead
ERE
1798 os.chmod(dirpath, member.mode)
1799 os.utime(dirpath, (member.mtime, member.mtime))
253d4cdd 1800 if self.canchown:
42c04ead
ERE
1801 # We have to be root to do so.
1802 try:
1803 g = grp.getgrnam(member.gname)[2]
1804 except KeyError:
1805 g = member.gid
1806 try:
1807 u = pwd.getpwnam(member.uname)[2]
1808 except KeyError:
1809 u = member.uid
1810 try:
4e433e00 1811 if member.issym and hasattr(os, "lchown"):
42c04ead
ERE
1812 os.lchown(dirpath, u, g)
1813 else:
1814 os.chown(dirpath, u, g)
1815 except EnvironmentError:
1816 raise tarfile.ExtractError("could not change owner")
1817
be60ffd0 1818 except tarfile.ExtractError as e:
4bda6f45 1819 self._deltatar.logger.warning('tarfile: %s' % e)
0501fe0a 1820
df86af81 1821 @staticmethod
b7c47f38 1822 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
ea6d3c3e
ERE
1823 '''
1824 Handles the new volumes
1825 '''
df86af81
ERE
1826 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1827 volume_number, guess_name=True)
ea6d3c3e
ERE
1828 volume_path = os.path.join(backup_path, volume_name)
1829
1830 # we convert relative paths into absolute because CWD is changed
1831 if not os.path.isabs(volume_path):
1832 volume_path = os.path.join(cwd, volume_path)
b7c47f38 1833 tarobj.open_volume(volume_path, encryption=encryption)
ea6d3c3e 1834
253d4cdd 1835 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
ea6d3c3e
ERE
1836 '''
1837 Restores a snapshot of a file from a specific backup
1838 '''
ea6d3c3e 1839 op_type = file_data.get('type', -1)
24ddf0a2 1840 member = file_data.get('member', None)
9f9ae874 1841 ismember = bool(member)
24ddf0a2
ERE
1842
1843 # when member is set, then we can assume everything is right and we
1844 # just have to restore the path
a2a37de7 1845 if member is None:
24ddf0a2
ERE
1846 vol_no = file_data.get('volume', -1)
1847 # sanity check
1848 if not isinstance(vol_no, int) or vol_no < 0:
4bda6f45 1849 self._deltatar.logger.warning('unrecognized type to be restored: '
24ddf0a2
ERE
1850 '%s, line %d' % (op_type, l_no))
1851
1852 # setup the volume that needs to be read. only needed when member is
1853 # not set
a2a37de7 1854 if index_data['curr_vol_no'] != vol_no:
24ddf0a2
ERE
1855 index_data['curr_vol_no'] = vol_no
1856 backup_path = os.path.dirname(index_data['path'])
1857 vol_name = self._deltatar.volume_name_func(backup_path,
1858 index_data['is_full'], vol_no, guess_name=True)
1859 vol_path = os.path.join(backup_path, vol_name)
1860 if index_data['vol_fd']:
1861 index_data['vol_fd'].close()
be60ffd0 1862 index_data['vol_fd'] = open(vol_path, 'rb')
24ddf0a2
ERE
1863
1864 # force reopen of the tarobj because of new volume
1865 if index_data['tarobj']:
1866 index_data['tarobj'].close()
1867 index_data['tarobj'] = None
1868
1869 # seek tarfile if needed
1870 offset = file_data.get('offset', -1)
ea6d3c3e 1871 if index_data['tarobj']:
c6226e2a
PG
1872 try:
1873 member = index_data['tarobj'].__iter__().__next__()
e93f83f1
PG
1874 except tarfile.DecryptionError:
1875 pass
1876 except tarfile.CompressionError:
1877 pass
1878
24ddf0a2
ERE
1879 if not member or member.path != file_data['path']:
1880 # force a seek and reopen
1881 index_data['tarobj'].close()
1882 index_data['tarobj'] = None
1883
1884 # open the tarfile if needed
1885 if not index_data['tarobj']:
1886 index_data['vol_fd'].seek(offset)
1887 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1888 fileobj=index_data['vol_fd'],
1889 format=tarfile.GNU_FORMAT,
d1c38f40 1890 concat='#' in self._deltatar.mode,
d5e1d60f 1891 encryption=index_data["decryptor"],
253d4cdd 1892 new_volume_handler=index_data['new_volume_handler'],
044585c6 1893 save_to_members=False,
04f4c7ab 1894 tolerance=self._disaster)
24ddf0a2 1895
be60ffd0 1896 member = index_data['tarobj'].__iter__().__next__()
ea6d3c3e 1897
253d4cdd
ERE
1898 member.path = unprefixed_path
1899 member.name = unprefixed_path
0501fe0a
ERE
1900
1901 if op_type == 'directory':
253d4cdd 1902 self.add_member_dir(member)
0501fe0a 1903 member = copy.copy(member)
be60ffd0 1904 member.mode = 0o0700
0501fe0a 1905
df86af81
ERE
1906 # if it's an existing directory, we then don't need to recreate it
1907 # just set the right permissions, mtime and that kind of stuff
1908 if os.path.exists(member.path):
1909 return
1910
9f9ae874 1911 if not ismember:
24ddf0a2
ERE
1912 # set current volume number in tarobj, otherwise the extraction of the
1913 # file might fail when trying to extract a multivolume member
1914 index_data['tarobj'].volume_number = index_data['curr_vol_no']
86a6e741 1915
9b13f5c4
PG
1916 def ignore_symlink (member, *_args):
1917 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
786addd6 1918
ea6d3c3e 1919 # finally, restore the file
9b13f5c4 1920 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
253d4cdd
ERE
1921
1922 def add_member_dir(self, member):
1923 '''
1924 Add member dir to be restored at the end
1925 '''
4e433e00 1926 if not self.canchown:
253d4cdd
ERE
1927 self._directories.append(DirItem(name=member.name, mode=member.mode,
1928 mtime=member.mtime))
1929 else:
1930 self._directories.append(DirItem(name=member.name, mode=member.mode,
1931 mtime=member.mtime, gname=member.gname, uname=member.uname,
4e433e00 1932 uid=member.uid, gid=member.gid, issym=member.issym()))
253d4cdd
ERE
1933
1934class DirItem(object):
1935 def __init__(self, **kwargs):
be60ffd0 1936 for k, v in kwargs.items():
9f9ae874 1937 setattr(self, k, v)