fix access race when traversing the filesystem
[python-delta-tar] / deltatar / deltatar.py
CommitLineData
6b2fa38f 1#!/usr/bin/env python3
0708a374 2
51797cd6 3# Copyright (C) 2013, 2014 Intra2net AG
0708a374
ERE
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; either version 3 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see
17# <http://www.gnu.org/licenses/lgpl-3.0.html>
18
938c2d54
PG
19DELTATAR_HEADER_VERSION = 1
20DELTATAR_PARAMETER_VERSION = 1
3fdea6d4 21
0708a374
ERE
22import logging
23import datetime
6c678f3a 24import binascii
938c2d54 25import io
0501fe0a 26import operator
0708a374 27import os
0501fe0a 28import copy
82de3376 29import shutil
8a8fadda 30import re
e82f14f5
ERE
31import stat
32import json
0708a374
ERE
33from functools import partial
34
35from . import tarfile
2ae46844 36from . import crypto
0708a374 37
0708a374
ERE
38class NullHandler(logging.Handler):
39 def emit(self, record):
40 pass
24ddf0a2
ERE
41
42
0708a374
ERE
43logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
44
974408b5
ERE
45
46# match mode
47NO_MATCH = False
48MATCH = True
49PARENT_MATCH = 2
50
133d30da
PG
51# encryption direction
52CRYPTO_MODE_ENCRYPT = 0
53CRYPTO_MODE_DECRYPT = 1
54
13cc7dfc
PG
55# The canonical extension for encrypted backup files regardless of the actual
56# encryption parameters is “.pdtcrypt”. This is analogous to the encryption
57# header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
58# Since the introduction of the versioned header there no longer any need
59# for encoding encryption parameters in the file extensions (“.aes128” and
60# suchlike).
61PDTCRYPT_EXTENSION = "pdtcrypt"
2cdd9faf
PG
62PDT_TYPE_ARCHIVE = 0
63PDT_TYPE_AUX = 1
13cc7dfc 64
9eccb1c2
PG
65AUXILIARY_FILE_INDEX = 0
66AUXILIARY_FILE_INFO = 1
67
0708a374
ERE
68class DeltaTar(object):
69 '''
70 Backup class used to create backups
71 '''
72
73 # list of files to exclude in the backup creation or restore operation. It
74 # can contain python regular expressions.
75 excluded_files = []
76
77 # list of files to include in the backup creation or restore operation. It
78 # can contain python regular expressions. If empty, all files in the source
79 # path will be backed up (when creating a backup) or all the files in the
a83fa4ed 80 # backup will be restored (when restoring a backup), but if included_files
0708a374
ERE
81 # is set then only the files include in the list will be processed.
82 included_files = []
83
84 # custom filter of files to be backed up (or restored). Unused and unset
85 # by default. The function receives a file path and must return a boolean.
86 filter_func = None
87
da26094a
ERE
88 # mode in which the delta will be created (when creating a backup) or
89 # opened (when restoring). Accepts modes analog to the tarfile library.
90 mode = ""
0708a374
ERE
91
92 # used together with aes modes to encrypt and decrypt backups.
93 password = None
1f3fd7b0
PG
94 crypto_key = None
95 nacl = None
0708a374 96
dbee011c
PG
97 # parameter version to use when encrypting; note that this has no effect
98 # on decryption since the required settings are determined from the headers
54f909ca 99 crypto_version = DELTATAR_HEADER_VERSION
dbee011c
PG
100 crypto_paramversion = None
101
133d30da 102 # when encrypting or decrypting, these hold crypto handlers; created before
2ae46844 103 # establishing the Tarfile stream iff a password is supplied.
133d30da
PG
104 encryptor = None
105 decryptor = None
2ae46844 106
0708a374
ERE
107 # python logger object.
108 logger = None
109
3a7e1a50
ERE
110 # specifies the index mode in the same format as @param mode, but without
111 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
2ae46844 112 # that the index is encrypted if no password is given in the constructor.
3a7e1a50 113 index_mode = None
0708a374
ERE
114
115 # current time for this backup. Used for file names and file creation checks
116 current_time = None
117
9eae9a1f
ERE
118 # extra data to included in the header of the index file when creating a
119 # backup
120 extra_data = dict()
121
0708a374
ERE
122 # valid tarfile modes and their corresponding default file extension
123 __file_extensions_dict = {
da26094a
ERE
124 '': '',
125 ':': '',
126 ':gz': '.gz',
127 ':bz2': '.bz2',
128 '|': '',
129 '|gz': '.gz',
130 '|bz2': '.bz2',
131 '#gz': '.gz',
6e99d23a
PG
132 '#gz.pdtcrypt': '.gz',
133 '#pdtcrypt': '',
d1c38f40 134 '#': '',
0708a374
ERE
135 }
136
3a7e1a50
ERE
137 # valid index modes and their corresponding default file extension
138 __index_extensions_dict = {
139 '': '',
140 'gz': '.gz',
141 'bz2': '.bz2',
6e99d23a
PG
142 'gz.pdtcrypt': '.gz',
143 'pdtcrypt': '',
3a7e1a50
ERE
144 }
145
8adbe50d
ERE
146 # valid path prefixes
147 __path_prefix_list = [
148 u'snapshot://',
149 u'list://',
150 u'delete://'
151 ]
152
0708a374 153 def __init__(self, excluded_files=[], included_files=[],
da26094a 154 filter_func=None, mode="", password=None,
1f3fd7b0 155 crypto_key=None, nacl=None,
54f909ca 156 crypto_version=DELTATAR_HEADER_VERSION,
dbee011c 157 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
3a7e1a50 158 logger=None, index_mode=None, index_name_func=None,
0708a374
ERE
159 volume_name_func=None):
160 '''
161 Constructor. Configures the diff engine.
162
163 Parameters:
164 - excluded_files: list of files to exclude in the backup creation or
165 restore operation. It can contain python regular expressions.
166
167 - included_files: list of files to include in the backup creation or
168 restore operation. It can contain python regular expressions. If
169 empty, all files in the source path will be backed up (when creating a
170 backup) or all the files in the backup will be restored (when
a83fa4ed 171 restoring a backup), but if included_files is set then only the files
0708a374
ERE
172 include in the list will be processed.
173
174 - filter_func: custom filter of files to be backed up (or restored).
175 Unused and unset by default. The function receives a file path and
176 must return a boolean.
177
178 - mode: mode in which the delta will be created (when creating a backup)
179 or opened (when restoring). Accepts the same modes as the tarfile
180 library. Valid modes are:
181
da26094a
ERE
182 '' open uncompressed
183 ':' open uncompressed
184 ':gz' open with gzip compression
185 ':bz2' open with bzip2 compression
186 '|' open an uncompressed stream of tar blocks
187 '|gz' open a gzip compressed stream of tar blocks
188 '|bz2' open a bzip2 compressed stream of tar blocks
189 '#gz' open a stream of gzip compressed tar blocks
0708a374 190
1f3fd7b0
PG
191 - crypto_key: used to encrypt and decrypt backups. Encryption will
192 be enabled automatically if a key is supplied. Requires a salt to be
193 passed as well.
194
195 - nacl: salt that was used to derive the encryption key for embedding
196 in the PDTCRYPT header. Not needed when decrypting and when
197 encrypting with password.
198
6e99d23a
PG
199 - password: used to encrypt and decrypt backups. Encryption will be
200 enabled automatically if a password is supplied.
0708a374 201
54f909ca
PG
202 - crypto_version: version of the format, determining the kind of PDT
203 object header.
204
dbee011c
PG
205 - crypto_paramversion: optionally request encryption conforming to
206 a specific parameter version. Defaults to the standard PDT value
207 which as of 2017 is the only one available.
208
0708a374
ERE
209 - logger: python logger object. Optional.
210
3a7e1a50 211 - index_mode: specifies the index mode in the same format as @param
6e99d23a
PG
212 mode, but without the ':', '|' or '#' at the begining. If encryption
213 is requested it will extend to the auxiliary (index, info) files as
214 well. This is an optional parameter that will automatically mimic
215 @param mode by default if not provided. Valid modes are:
3a7e1a50
ERE
216
217 '' open uncompressed
218 'gz' open with gzip compression
219 'bz2' open with bzip2 compression
0708a374
ERE
220
221 - index_name_func: function that sets a custom name for the index file.
2cc6e32b
PG
222 This function receives a flag to indicate whether the name will be
223 used for a full or diff backup. The backup path will be prepended to
224 its return value.
0708a374
ERE
225
226 - volume_name_func: function that defines the name of tar volumes. It
227 receives the backup_path, if it's a full backup and the volume number,
228 and must return the name for the corresponding volume name. Optional,
229 DeltaTar has default names for tar volumes.
230 '''
231
da26094a 232 if mode not in self.__file_extensions_dict:
8a54d5dd
PG
233 raise Exception('Unrecognized extension mode=[%s] requested for files'
234 % str(mode))
0708a374
ERE
235
236 self.excluded_files = excluded_files
237 self.included_files = included_files
238 self.filter_func = filter_func
239 self.logger = logging.getLogger('deltatar.DeltaTar')
240 if logger:
241 self.logger.addHandler(logger)
242 self.mode = mode
2ae46844 243
1f3fd7b0
PG
244 if crypto_key is not None:
245 self.crypto_key = crypto_key
246 self.nacl = nacl # encryption only
247
2ae46844
PG
248 if password is not None:
249 self.password = password
3a7e1a50 250
54f909ca
PG
251 if crypto_version is not None:
252 self.crypto_version = crypto_version
253
dbee011c
PG
254 if crypto_paramversion is not None:
255 self.crypto_paramversion = crypto_paramversion
256
3a7e1a50
ERE
257 # generate index_mode
258 if index_mode is None:
259 index_mode = ''
6e99d23a 260 if 'gz' in mode:
3a7e1a50
ERE
261 index_mode = "gz"
262 elif 'bz2' in mode:
263 index_mode = "bz2"
264 elif mode not in self.__index_extensions_dict:
8a54d5dd
PG
265 raise Exception('Unrecognized extension mode=[%s] requested for index'
266 % str(mode))
3a7e1a50
ERE
267
268 self.index_mode = index_mode
0708a374
ERE
269 self.current_time = datetime.datetime.now()
270
271 if index_name_func is not None:
272 self.index_name_func = index_name_func
273
274 if volume_name_func is not None:
275 self.volume_name_func = volume_name_func
276
e54cfec5 277 def pick_extension(self, kind, mode=None):
2cdd9faf
PG
278 """
279 Choose the extension depending on a) the kind of file given, b) the
280 processing mode, and c) the current encryption settings.
281 """
282 ret = ""
283 if kind == PDT_TYPE_ARCHIVE:
284 ret += ".tar"
e54cfec5
PG
285 if mode is None:
286 mode = self.__index_extensions_dict [self.index_mode]
2cdd9faf 287 ret += mode
a83fa4ed 288 if self.crypto_key is not None or self.password is not None:
2cdd9faf
PG
289 ret += "." + PDTCRYPT_EXTENSION
290 return ret
291
f0287fb7 292 def index_name_func(self, is_full): # pylint: disable=method-hidden
0708a374 293 '''
2cc6e32b
PG
294 Callback for setting a custom name for the index file. Depending on
295 whether *is_full* is set, it will create a suitable name for a full
296 or a diff backup.
0708a374
ERE
297 '''
298 prefix = "bfull" if is_full else "bdiff"
f7940c31 299 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf
PG
300 extension = self.pick_extension \
301 (PDT_TYPE_AUX,
302 self.__index_extensions_dict [self.index_mode])
0708a374 303
da26094a 304 return "%s-%s.index%s" % (prefix, date_str, extension)
0708a374 305
f0287fb7
CH
306 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
307 is_full, volume_number,
308 guess_name=False):
0708a374
ERE
309 '''
310 function that defines the name of tar volumes. It receives the
311 backup_path, if it's a full backup and the volume number, and must return
312 the name for the corresponding volume name. Optional, DeltaTar has default
313 names for tar volumes.
df86af81
ERE
314
315 If guess_name is activated, the file is intended not to be created but
316 to be found, and thus the date will be guessed.
0708a374
ERE
317 '''
318 prefix = "bfull" if is_full else "bdiff"
2cdd9faf
PG
319 extension = self.pick_extension \
320 (PDT_TYPE_ARCHIVE,
321 self.__file_extensions_dict [self.mode])
0708a374 322
df86af81 323 if not guess_name:
f7940c31 324 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf 325 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
df86af81
ERE
326 else:
327 prefix = prefix + "-"
90b75470 328 postfix = "-%03d%s" % (volume_number + 1, extension)
86a6e741
ERE
329 for f in os.listdir(backup_path):
330 if f.startswith(prefix) and f.endswith(postfix):
331 return f
df86af81
ERE
332 raise Exception("volume not found")
333
0708a374 334
974408b5 335 def filter_path(self, path, source_path="", is_dir=None):
8a8fadda
ERE
336 '''
337 Filters a path, given the source_path, using the filtering properties
338 set in the constructor.
339 The filtering order is:
340 1. included_files (if any)
341 2. excluded_files
342 3. filter_func (which must return whether the file is accepted or not)
343 '''
75059f3c 344
c1af2184 345 if len(source_path) > 0:
75059f3c
CH
346 # ensure that exactly one '/' at end of dir is also removed
347 source_path = source_path.rstrip(os.sep) + os.sep
8a8fadda
ERE
348 path = path[len(source_path):]
349
350 # 1. filter included_files
974408b5 351 match = MATCH
8a8fadda 352 if len(self.included_files) > 0:
974408b5 353 match = NO_MATCH
8a8fadda
ERE
354 for i in self.included_files:
355 # it can be either a regexp or a string
be60ffd0 356 if isinstance(i, str):
8a8fadda
ERE
357 # if the string matches, then continue
358 if i == path:
974408b5 359 match = MATCH
c1af2184 360 break
8a8fadda
ERE
361
362 # if the string ends with / it's a directory, and if the
7b07645e 363 # path is contained in it, it is included
c1af2184 364 if i.endswith('/') and path.startswith(i):
974408b5 365 match = MATCH
c1af2184 366 break
8a8fadda
ERE
367
368 # if the string doesn't end with /, add it and do the same
369 # check
c1af2184 370 elif path.startswith(i + '/'):
974408b5 371 match = MATCH
c1af2184 372 break
8a8fadda 373
974408b5
ERE
374 # check for PARENT_MATCH
375 if is_dir:
376 dir_path = path
377 if not dir_path.endswith('/'):
378 dir_path += '/'
379
380 if i.startswith(dir_path):
381 match = PARENT_MATCH
382
8a8fadda
ERE
383 # if it's a reg exp, then we just check if it matches
384 elif isinstance(i, re._pattern_type):
c1af2184 385 if i.match(path):
974408b5 386 match = MATCH
c1af2184 387 break
8a8fadda 388 else:
4bda6f45 389 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
8a8fadda 390
974408b5
ERE
391 if match == NO_MATCH:
392 return NO_MATCH
c1af2184 393
974408b5
ERE
394 # when a directory is in PARENT_MATCH, it doesn't matter if it's
395 # excluded. It's subfiles will be excluded, but the directory itself
396 # won't
397 if match != PARENT_MATCH:
8a8fadda
ERE
398 for e in self.excluded_files:
399 # it can be either a regexp or a string
be60ffd0 400 if isinstance(e, str):
8a8fadda 401 # if the string matches, then exclude
c1af2184 402 if e == path:
974408b5 403 return NO_MATCH
8a8fadda
ERE
404
405 # if the string ends with / it's a directory, and if the
406 # path starts with the directory, then exclude
c1af2184 407 if e.endswith('/') and path.startswith(e):
974408b5 408 return NO_MATCH
8a8fadda
ERE
409
410 # if the string doesn't end with /, do the same check with
411 # the slash added
c1af2184 412 elif path.startswith(e + '/'):
974408b5 413 return NO_MATCH
8a8fadda
ERE
414
415 # if it's a reg exp, then we just check if it matches
c1af2184
ERE
416 elif isinstance(e, re._pattern_type):
417 if e.match(path):
974408b5 418 return NO_MATCH
8a8fadda 419 else:
4bda6f45 420 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
8a8fadda
ERE
421
422 if self.filter_func:
423 return self.filter_func(path)
424
974408b5 425 return match
8a8fadda 426
283fbd5e 427 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
0708a374
ERE
428 '''
429 Walk a directory recursively, yielding each file/directory
c059a221
PG
430
431 Returns the path of an entity. If ``keep_base_dir`` is set,
432 the path returned contains the prefix ``source_path``; otherwise it is
433 relative to the prefix.
0708a374
ERE
434 '''
435
283fbd5e 436 source_path = source_path.rstrip(os.sep)
0708a374 437
283fbd5e 438 if keep_base_dir:
adf7dac4 439 beginning_size = 0
283fbd5e
CH
440 else:
441 beginning_size = len(source_path) + 1 # +1 for os.sep
442
443 queue = [source_path]
444
d07c8065 445 while queue:
df86af81 446 cur_path = queue.pop(0)
0708a374 447
c059a221
PG
448 dfd = os.open (cur_path, os.O_DIRECTORY)
449 if dfd == -1: # it might have been removed in the meantime
d86735e4
ERE
450 continue
451
c059a221
PG
452 try:
453 for filename in sorted(os.listdir(dfd)):
454 child = os.path.join(cur_path, filename)
455 is_dir = os.path.isdir(child)
456 status = self.filter_path(child, source_path, is_dir)
457 if status == NO_MATCH:
458 continue
459 if not os.access(child, os.R_OK):
460 self.logger.warning('Error accessing possibly locked file %s' % child)
461 continue
462
463 if status == MATCH:
464 yield child[beginning_size:]
465
466 if is_dir and (status == MATCH or status == PARENT_MATCH):
467 queue.append(child)
468 finally:
469 os.close (dfd)
0708a374 470
e82f14f5
ERE
471 def _stat_dict(self, path):
472 '''
473 Returns a dict with the stat data used to compare files
474 '''
475 stinfo = os.stat(path)
476 mode = stinfo.st_mode
477
478 ptype = None
479 if stat.S_ISDIR(mode):
d07c8065 480 ptype = u'directory'
e82f14f5 481 elif stat.S_ISREG(mode):
d07c8065 482 ptype = u'file'
e82f14f5 483 elif stat.S_ISLNK(mode):
d07c8065 484 ptype = u'link'
e82f14f5
ERE
485
486 return {
d07c8065 487 u'type': ptype,
be60ffd0 488 u'path': path,
d07c8065 489 u'mode': mode,
0501fe0a
ERE
490 u'mtime': int(stinfo.st_mtime),
491 u'ctime': int(stinfo.st_ctime),
d07c8065
ERE
492 u'uid': stinfo.st_uid,
493 u'gid': stinfo.st_gid,
494 u'inode': stinfo.st_ino,
495 u'size': stinfo.st_size
e82f14f5
ERE
496 }
497
df99a044 498 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
d07c8065
ERE
499 '''
500 Return if the dicts are equal in the stat keys
501 '''
fc8fdcbc 502 keys = [u'type', u'mode',u'size', u'mtime',
d041935c 503 # not restored: u'inode', u'ctime'
df99a044 504 ]
8adbe50d 505
fc8fdcbc 506 # only if user is root, then also check gid/uid. otherwise do not check it,
d041935c 507 # because tarfile can chown in case of being superuser only
50d70ca9
PG
508 #
509 # also, skip the check in rpmbuild since the sources end up with the
510 # uid:gid of the packager while the extracted files are 0:0.
511 if hasattr(os, "geteuid") and os.geteuid() == 0 \
512 and os.getenv ("RPMBUILD_OPTIONS") is None:
fc8fdcbc
ERE
513 keys.append('gid')
514 keys.append('uid')
515
ea6d3c3e 516 if (not d1 and d2 != None) or (d1 != None and not d2):
8adbe50d
ERE
517 return False
518
cbac9f0b
ERE
519 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
520 return False
8adbe50d 521
fc8fdcbc
ERE
522 type = d1.get('type', '')
523
d07c8065 524 for key in keys:
fc8fdcbc
ERE
525 # size doesn't matter for directories
526 if type == 'directory' and key == 'size':
527 continue
d07c8065
ERE
528 if d1.get(key, -1) != d2.get(key, -2):
529 return False
530 return True
531
df99a044 532 def prefixed(self, path, listsnapshot_equal=False):
8adbe50d
ERE
533 '''
534 if a path is not prefixed, return it prefixed
535 '''
536 for prefix in self.__path_prefix_list:
537 if path.startswith(prefix):
df99a044
ERE
538 if listsnapshot_equal and prefix == u'list://':
539 return u'snapshot://' + path[len(prefix):]
8adbe50d
ERE
540 return path
541 return u'snapshot://' + path
542
543 def unprefixed(self, path):
544 '''
545 remove a path prefix if any
546 '''
547 for prefix in self.__path_prefix_list:
548 if path.startswith(prefix):
549 return path[len(prefix):]
550 return path
551
133d30da
PG
552
553 def initialize_encryption (self, mode):
554 password = self.password
1f3fd7b0
PG
555 key = self.crypto_key
556 nacl = self.nacl
133d30da 557
1f3fd7b0 558 if key is None and password is None:
133d30da
PG
559 return
560 if mode == CRYPTO_MODE_ENCRYPT:
1f3fd7b0
PG
561 return crypto.Encrypt (password=password,
562 key=key,
563 nacl=nacl,
54f909ca 564 version=self.crypto_version,
774ca538 565 paramversion=self.crypto_paramversion)
133d30da 566 if mode == CRYPTO_MODE_DECRYPT:
1f3fd7b0 567 return crypto.Decrypt (password=password, key=key)
133d30da
PG
568
569 raise Exception ("invalid encryption mode [%r]" % mode)
570
571
9eccb1c2 572 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
3a7e1a50 573 '''
9eccb1c2
PG
574 Given the specified configuration, opens a file for reading or writing,
575 inheriting the encryption and compression settings from the backup.
576 Returns a file object ready to use.
3fdea6d4 577
c8c72fe1
PG
578 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
579 respectively).
580 :type mode: str
774ca538
PG
581 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
582 Both the info and the auxiliary file have a globally
583 unique, constant counter value.
3fdea6d4 584 :type kind: str
3a7e1a50 585 '''
3a7e1a50
ERE
586 if self.index_mode.startswith('gz'):
587 comptype = 'gz'
588 elif self.index_mode.startswith('bz2'):
589 comptype = 'bz2'
590 else:
591 comptype = 'tar'
592
133d30da 593 crypto_ctx = None
6de9444a 594 enccounter = None
133d30da 595 if mode == "w":
774ca538 596 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 597 elif mode == "r":
774ca538 598 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
133d30da 599
3031b7ae
PG
600 if crypto_ctx is not None:
601 if kind == AUXILIARY_FILE_INFO:
602 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
603 elif kind == AUXILIARY_FILE_INDEX:
604 enccounter = crypto.AES_GCM_IV_CNT_INDEX
605 else:
606 raise Exception ("invalid kind of aux file %r" % kind)
607
c8c72fe1 608 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
3fdea6d4 609 bufsize=tarfile.RECORDSIZE, fileobj=None,
6de9444a 610 encryption=crypto_ctx, enccounter=enccounter)
c8c72fe1
PG
611
612 return sink
613
3a7e1a50 614
0708a374 615 def create_full_backup(self, source_path, backup_path,
d4a05db6 616 max_volume_size=None, extra_data=dict()):
0708a374
ERE
617 '''
618 Creates a full backup.
619
620 Parameters:
621 - source_path: source path to the directory to back up.
622 - backup_path: path where the back up will be stored. Backup path will
623 be created if not existent.
d5361dac
ERE
624 - max_volume_size: maximum volume size in megabytes. Used to split the
625 backup in volumes. Optional (won't split in volumes by default).
9eae9a1f
ERE
626 - extra_data: a json-serializable dictionary with information that you
627 want to be included in the header of the index file
0708a374
ERE
628 '''
629 # check input
be60ffd0 630 if not isinstance(source_path, str):
0708a374
ERE
631 raise Exception('Source path must be a string')
632
be60ffd0 633 if not isinstance(backup_path, str):
0708a374
ERE
634 raise Exception('Backup path must be a string')
635
636 if not os.path.exists(source_path) or not os.path.isdir(source_path):
637 raise Exception('Source path "%s" does not exist or is not a '\
638 'directory' % source_path)
639
d07c8065
ERE
640 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
641 max_volume_size < 1):
642 raise Exception('max_volume_size must be a positive integer')
d5361dac
ERE
643 if max_volume_size != None:
644 max_volume_size = max_volume_size*1024*1024
645
9eae9a1f
ERE
646 if not isinstance(extra_data, dict):
647 raise Exception('extra_data must be a dictionary')
648
649 try:
650 extra_data_str = json.dumps(extra_data)
651 except:
652 raise Exception('extra_data is not json-serializable')
653
0708a374
ERE
654 if not os.access(source_path, os.R_OK):
655 raise Exception('Source path "%s" is not readable' % source_path)
656
657 # try to create backup path if needed
658 if not os.path.exists(backup_path):
d4a05db6 659 os.makedirs(backup_path)
0708a374
ERE
660
661 if not os.access(backup_path, os.W_OK):
662 raise Exception('Backup path "%s" is not writeable' % backup_path)
663
664 if source_path.endswith('/'):
665 source_path = source_path[:-1]
666
667 if backup_path.endswith('/'):
668 backup_path = backup_path[:-1]
669
670 # update current time
671 self.current_time = datetime.datetime.now()
672
673 if self.mode not in self.__file_extensions_dict:
674 raise Exception('Unrecognized extension')
675
2ae46844 676 # setup for encrypting payload
774ca538
PG
677 if self.encryptor is None:
678 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
2ae46844 679
0708a374 680 # some initialization
11684b1d 681 self.vol_no = 0
0708a374
ERE
682
683 # generate the first volume name
684 vol_name = self.volume_name_func(backup_path, True, 0)
685 tarfile_path = os.path.join(backup_path, vol_name)
686
774ca538
PG
687 # init index
688 index_name = self.index_name_func(True)
689 index_path = os.path.join(backup_path, index_name)
690 index_sink = self.open_auxiliary_file(index_path, 'w')
e82f14f5 691
d5361dac
ERE
692 cwd = os.getcwd()
693
b7c47f38 694 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
0708a374
ERE
695 '''
696 Handles the new volumes
697 '''
d5361dac
ERE
698 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
699 volume_path = os.path.join(backup_path, volume_name)
11684b1d 700 deltarobj.vol_no = volume_number
d5361dac
ERE
701
702 # we convert relative paths into absolute because CWD is changed
703 if not os.path.isabs(volume_path):
704 volume_path = os.path.join(cwd, volume_path)
11684b1d 705
8e019196
ERE
706 if tarobj.fileobj is not None:
707 tarobj.fileobj.close()
708
b008f989
ERE
709 deltarobj.logger.debug("opening volume %s" % volume_path)
710
b7c47f38 711 tarobj.open_volume(volume_path, encryption=encryption)
d5361dac
ERE
712
713 # wraps some args from context into the handler
133d30da 714 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
0708a374 715
774ca538 716 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
6c678f3a 717
be60ffd0 718 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
6c678f3a 719 # calculate checksum and write into the stream
c2ffe2ec 720 crc = binascii.crc32(s) & 0xFFFFffff
774ca538 721 index_sink.write(s)
e82f14f5 722
0708a374
ERE
723 # start creating the tarfile
724 tarobj = tarfile.TarFile.open(tarfile_path,
da26094a 725 mode='w' + self.mode,
0708a374 726 format=tarfile.GNU_FORMAT,
d1c38f40 727 concat='#' in self.mode,
133d30da 728 encryption=self.encryptor,
0708a374 729 max_volume_size=max_volume_size,
ea625b04 730 new_volume_handler=new_volume_handler,
e2b59b34
ERE
731 save_to_members=False,
732 dereference=True)
e5c6ca04 733 os.chdir(source_path)
55b8686d
ERE
734
735 # for each file to be in the backup, do:
e82f14f5 736 for path in self._recursive_walk_dir('.'):
55b8686d 737 # calculate stat dict for current file
253d4cdd
ERE
738 statd = self._stat_dict(path)
739 statd['path'] = u'snapshot://' + statd['path']
740 statd['volume'] = self.vol_no
55b8686d
ERE
741
742 # backup file
3e9b81bb
PG
743
744 try: # backup file
745 tarobj.add(path, arcname = statd['path'], recursive=False)
746 except FileNotFoundError as exn:
747 # file vanished since the call to access(3) above
748 self.logger.warning ("object [%s] no longer available in "
749 "file system (error: %s); skipping"
750 % (path, str (exn)))
751 continue # prevent indexing
11684b1d 752
55b8686d 753 # retrieve file offset
253d4cdd 754 statd['offset'] = tarobj.get_last_member_offset()
b008f989 755 self.logger.debug("backup %s" % statd['path'])
6c678f3a 756
d041935c 757 # store the stat dict in the index
be60ffd0 758 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
6c678f3a 759 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 760 index_sink.write(s)
e82f14f5 761
be60ffd0 762 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
6c678f3a 763 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 764 index_sink.write(s)
be60ffd0 765 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
774ca538
PG
766 index_sink.write(s)
767
e5c6ca04 768 os.chdir(cwd)
0708a374 769 tarobj.close()
c8c72fe1 770 index_sink.close (close_fileobj=True)
938c2d54 771
0708a374 772 def create_diff_backup(self, source_path, backup_path, previous_index_path,
d4a05db6 773 max_volume_size=None, extra_data=dict()):
0708a374
ERE
774 '''
775 Creates a backup.
776
777 Parameters:
778 - source_path: source path to the directory to back up.
779 - backup_path: path where the back up will be stored. Backup path will
780 be created if not existent.
781 - previous_index_path: index of the previous backup, needed to know
782 which files changed since then.
783 - max_volume_size: maximum volume size in megabytes (MB). Used to split
784 the backup in volumes. Optional (won't split in volumes by default).
3a7e1a50
ERE
785
786 NOTE: previous index is assumed to follow exactly the same format as
787 the index_mode setup in the constructor.
0708a374 788 '''
d07c8065 789 # check/sanitize input
be60ffd0 790 if not isinstance(source_path, str):
d07c8065
ERE
791 raise Exception('Source path must be a string')
792
be60ffd0 793 if not isinstance(backup_path, str):
d07c8065
ERE
794 raise Exception('Backup path must be a string')
795
796 if not os.path.exists(source_path) or not os.path.isdir(source_path):
797 raise Exception('Source path "%s" does not exist or is not a '\
798 'directory' % source_path)
799
9eae9a1f
ERE
800 if not isinstance(extra_data, dict):
801 raise Exception('extra_data must be a dictionary')
802
803 try:
804 extra_data_str = json.dumps(extra_data)
805 except:
806 raise Exception('extra_data is not json-serializable')
807
d07c8065
ERE
808 if not os.access(source_path, os.R_OK):
809 raise Exception('Source path "%s" is not readable' % source_path)
810
811 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
812 max_volume_size < 1):
813 raise Exception('max_volume_size must be a positive integer')
814 if max_volume_size != None:
815 max_volume_size = max_volume_size*1024*1024
816
be60ffd0 817 if not isinstance(previous_index_path, str):
d07c8065
ERE
818 raise Exception('previous_index_path must be A string')
819
820 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
821 raise Exception('Index path "%s" does not exist or is not a '\
822 'file' % previous_index_path)
823
824 if not os.access(previous_index_path, os.R_OK):
825 raise Exception('Index path "%s" is not readable' % previous_index_path)
826
827 # try to create backup path if needed
828 if not os.path.exists(backup_path):
d4a05db6 829 os.makedirs(backup_path)
d07c8065
ERE
830
831 if not os.access(backup_path, os.W_OK):
832 raise Exception('Backup path "%s" is not writeable' % backup_path)
833
834 if source_path.endswith('/'):
835 source_path = source_path[:-1]
836
837 if backup_path.endswith('/'):
838 backup_path = backup_path[:-1]
839
840 # update current time
841 self.current_time = datetime.datetime.now()
842
843 if self.mode not in self.__file_extensions_dict:
844 raise Exception('Unrecognized extension')
845
2ae46844 846 # setup for encrypting payload
774ca538
PG
847 if self.encryptor is None:
848 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 849
d07c8065
ERE
850 # some initialization
851 self.vol_no = 0
852
853 # generate the first volume name
df86af81
ERE
854 vol_name = self.volume_name_func(backup_path, is_full=False,
855 volume_number=0)
d07c8065
ERE
856 tarfile_path = os.path.join(backup_path, vol_name)
857
938c2d54 858 # init index
d07c8065
ERE
859 cwd = os.getcwd()
860
3031b7ae
PG
861 index_name = self.index_name_func(is_full=False)
862 index_path = os.path.join(backup_path, index_name)
863 index_sink = self.open_auxiliary_file(index_path, 'w')
864
d07c8065
ERE
865 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
866 '''
867 Handles the new volumes
868 '''
df86af81
ERE
869 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
870 volume_number=volume_number)
d07c8065
ERE
871 volume_path = os.path.join(backup_path, volume_name)
872 deltarobj.vol_no = volume_number
873
874 # we convert relative paths into absolute because CWD is changed
875 if not os.path.isabs(volume_path):
876 volume_path = os.path.join(cwd, volume_path)
877
f624ff3d 878 deltarobj.logger.debug("opening volume %s" % volume_path)
d07c8065
ERE
879 tarobj.open_volume(volume_path)
880
881 # wraps some args from context into the handler
882 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
883
3031b7ae 884 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
d07c8065 885
be60ffd0 886 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
d07c8065 887 # calculate checksum and write into the stream
c2ffe2ec 888 crc = binascii.crc32(s) & 0xFFFFffff
3031b7ae 889 index_sink.write(s)
d07c8065
ERE
890
891 # start creating the tarfile
892 tarobj = tarfile.TarFile.open(tarfile_path,
893 mode='w' + self.mode,
894 format=tarfile.GNU_FORMAT,
d1c38f40 895 concat='#' in self.mode,
133d30da 896 encryption=self.encryptor,
d07c8065 897 max_volume_size=max_volume_size,
ea625b04 898 new_volume_handler=new_volume_handler,
e2b59b34
ERE
899 save_to_members=False,
900 dereference=True)
d07c8065 901
aae127d0
ERE
902
903 # create the iterators, first the previous index iterator, then the
904 # source path directory iterator and collate and iterate them
905 if not os.path.isabs(previous_index_path):
906 previous_index_path = os.path.join(cwd, previous_index_path)
907 index_it = self.iterate_index_path(previous_index_path)
908
d07c8065 909 os.chdir(source_path)
aae127d0
ERE
910 dir_it = self._recursive_walk_dir('.')
911 dir_path_it = self.jsonize_path_iterator(dir_it)
d07c8065 912
df86af81
ERE
913 def pr(path):
914 if not path:
915 return "None"
916 else:
917 return path["path"]
8edb2e3c 918
d07c8065 919 # for each file to be in the backup, do:
df86af81 920 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
aae127d0
ERE
921 action = None
922 # if file is not in the index, it means it's a new file, so we have
923 # to take a snapshot
df86af81 924
aae127d0
ERE
925 if not ipath:
926 action = 'snapshot'
927 # if the file is not in the directory iterator, it means that it has
d041935c 928 # been deleted, so we need to mark it as such
aae127d0
ERE
929 elif not dpath:
930 action = 'delete'
931 # if the file is in both iterators, it means it might have either
932 # not changed (in which case we will just list it in our index but
933 # it will not be included in the tar file), or it might have
e8d95fe5 934 # changed, in which case we will snapshot it.
aae127d0
ERE
935 elif ipath and dpath:
936 if self._equal_stat_dicts(ipath, dpath):
937 action = 'list'
938 else:
939 action = 'snapshot'
940 # TODO: when creating chained backups (i.e. diffing from another
941 # diff), we will need to detect the type of action in the previous
942 # index, because if it was delete and dpath is None, we should
943 # discard the file
944
945 if action == 'snapshot':
946 # calculate stat dict for current file
947 stat = dpath.copy()
be60ffd0 948 stat['path'] = "snapshot://" + dpath['path']
aae127d0
ERE
949 stat['volume'] = self.vol_no
950
50f43227
ERE
951 self.logger.debug("[STORE] %s" % dpath['path'])
952
3e9b81bb
PG
953 try: # backup file
954 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
955 # retrieve file offset
956 stat['offset'] = tarobj.get_last_member_offset()
957 except FileNotFoundError as exn:
958 # file vanished since the call to access(3) above
959 self.logger.warning ("object [%s] no longer available in "
960 "file system (error: %s); skipping"
961 % (dpath ["path"], str (exn)))
962 stat = None # prevent indexing
aae127d0 963
aae127d0 964 elif action == 'delete':
50f43227 965 path = self.unprefixed(ipath['path'])
aae127d0 966 stat = {
50f43227 967 u'path': u'delete://' + path,
aae127d0
ERE
968 u'type': ipath['type']
969 }
50f43227 970 self.logger.debug("[DELETE] %s" % path)
aae127d0
ERE
971
972 # mark it as deleted in the backup
42d39ca7 973 tarobj.add("/dev/null", arcname=stat['path'])
aae127d0
ERE
974 elif action == 'list':
975 stat = dpath.copy()
50f43227
ERE
976 path = self.unprefixed(ipath['path'])
977 stat['path'] = u'list://' + path
aae127d0 978 # unchanged files do not enter in the backup, only in the index
50f43227 979 self.logger.debug("[UNCHANGED] %s" % path)
80910564
TJ
980 else:
981 # should not happen
4bda6f45 982 self.logger.warning('unknown action in create_diff_backup: {0}'
80910564
TJ
983 ''.format(action))
984 stat = None
aae127d0 985
80910564
TJ
986 if stat:
987 # store the stat dict in the index
be60ffd0 988 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
aae127d0 989 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 990 index_sink.write(s)
aae127d0 991
be60ffd0 992 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
aae127d0 993 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 994 index_sink.write(s)
be60ffd0 995 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
3031b7ae 996 index_sink.write(s)
938c2d54 997
df86af81 998 index_it.release()
aae127d0
ERE
999 os.chdir(cwd)
1000 tarobj.close()
938c2d54
PG
1001 index_sink.close()
1002
1003
d07c8065 1004 def iterate_index_path(self, index_path):
df86af81
ERE
1005 '''
1006 Returns an index iterator. Internally, it uses a classic iterator class.
1007 We do that instead of just yielding so that the iterator object can have
1008 an additional function to close the file descriptor that is opened in
1009 the constructor.
1010 '''
d07c8065 1011
df86af81
ERE
1012 class IndexPathIterator(object):
1013 def __init__(self, delta_tar, index_path):
1014 self.delta_tar = delta_tar
1015 self.index_path = index_path
1016 self.f = None
9eae9a1f 1017 self.extra_data = dict()
df86af81 1018 self.__enter__()
d07c8065 1019
df86af81
ERE
1020 def __iter__(self):
1021 return self
d07c8065 1022
df86af81
ERE
1023 def release(self):
1024 if self.f:
1025 self.f.close()
1026
1027 def __enter__(self):
1028 '''
1029 Allows this iterator to be used with the "with" statement
1030 '''
1031 if self.f is None:
9eccb1c2 1032 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
df86af81
ERE
1033 # check index header
1034 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1035 if j.get("type", '') != 'python-delta-tar-index' or\
1036 j.get('version', -1) != 1:
1037 raise Exception("invalid index file format: %s" % json.dumps(j))
1038
9eae9a1f
ERE
1039 self.extra_data = j.get('extra_data', dict())
1040
df86af81
ERE
1041 # find BEGIN-FILE-LIST, ignore other headers
1042 while True:
1043 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1044 if j.get('type', '') == 'BEGIN-FILE-LIST':
1045 break
1046 return self
1047
1048 def __exit__(self, type, value, tb):
1049 '''
1050 Allows this iterator to be used with the "with" statement
1051 '''
ec57ce53
ERE
1052 if self.f:
1053 self.f.close()
df86af81 1054 self.f = None
d07c8065 1055
be60ffd0 1056 def __next__(self):
0349168a 1057 # read each file in the index and process it to do the restore
df86af81
ERE
1058 j = {}
1059 l_no = -1
1060 try:
1061 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
be60ffd0 1062 except Exception as e:
df86af81
ERE
1063 if self.f:
1064 self.f.close()
1065 raise e
d07c8065 1066
df86af81 1067 op_type = j.get('type', '')
d07c8065 1068
df86af81
ERE
1069 # when we detect the end of the list, break the loop
1070 if op_type == 'END-FILE-LIST':
1071 if self.f:
1072 self.f.close()
1073 raise StopIteration
1074
1075 # check input
1076 if op_type not in ['directory', 'file', 'link']:
4bda6f45 1077 self.delta_tar.logger.warning('unrecognized type to be '
df86af81
ERE
1078 'restored: %s, line %d' % (op_type, l_no))
1079 # iterate again
be60ffd0 1080 return self.__next__()
df86af81
ERE
1081
1082 return j, l_no
d07c8065 1083
df86af81 1084 return IndexPathIterator(self, index_path)
d07c8065 1085
26fdd428 1086 def iterate_tar_path(self, tar_path, new_volume_handler=None):
24ddf0a2
ERE
1087 '''
1088 Returns a tar iterator that iterates jsonized member items that contain
1089 an additional "member" field, used by RestoreHelper.
1090 '''
ec57ce53 1091 class TarPathIterator(object):
83a81852 1092 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
24ddf0a2 1093 self.delta_tar = delta_tar
ec57ce53 1094 self.tar_path = tar_path
24ddf0a2 1095 self.tar_obj = None
6bca471c 1096 self.last_member = None
26fdd428 1097 self.new_volume_handler = new_volume_handler
24ddf0a2
ERE
1098 self.__enter__()
1099
1100 def __iter__(self):
1101 return self
1102
1103 def release(self):
1104 if self.tar_obj:
1105 self.tar_obj.close()
1106
1107 def __enter__(self):
1108 '''
1109 Allows this iterator to be used with the "with" statement
1110 '''
1111 if self.tar_obj is None:
d5e1d60f
PG
1112 decryptor = None
1113 if self.delta_tar.password is not None:
1f3fd7b0
PG
1114 decryptor = crypto.Decrypt \
1115 (password=self.delta_tar.password,
1116 key=self.delta_tar.crypto_key)
ec57ce53
ERE
1117 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1118 mode='r' + self.delta_tar.mode,
1119 format=tarfile.GNU_FORMAT,
d1c38f40 1120 concat='#' in self.delta_tar.mode,
d5e1d60f 1121 encryption=decryptor,
83a81852 1122 new_volume_handler=self.new_volume_handler,
e2b59b34
ERE
1123 save_to_members=False,
1124 dereference=True)
24ddf0a2
ERE
1125 return self
1126
1127 def __exit__(self, type, value, tb):
1128 '''
1129 Allows this iterator to be used with the "with" statement
1130 '''
ec57ce53
ERE
1131 if self.tar_obj:
1132 self.tar_obj.close()
24ddf0a2
ERE
1133 self.tar_obj = None
1134
be60ffd0 1135 def __next__(self):
24ddf0a2
ERE
1136 '''
1137 Read each member and return it as a stat dict
1138 '''
be60ffd0 1139 tarinfo = self.tar_obj.__iter__().__next__()
8e019196
ERE
1140 # NOTE: here we compare if tarinfo.path is the same as before
1141 # instead of comparing the tarinfo object itself because the
1142 # object itself might change for multivol tarinfos
1143 if tarinfo is None or (self.last_member is not None and\
1144 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
ec57ce53
ERE
1145 raise StopIteration
1146
6bca471c
ERE
1147 self.last_member = tarinfo
1148
24ddf0a2
ERE
1149 ptype = 'unknown'
1150 if tarinfo.isfile():
1151 ptype = 'file'
1152 elif tarinfo.isdir():
ab7e7465 1153 ptype = 'directory'
24ddf0a2
ERE
1154 elif tarinfo.islnk() or tarinfo.issym():
1155 ptype = 'link'
1156
1157 return {
1158 u'type': ptype,
1159 u'path': tarinfo.path,
1160 u'mode': tarinfo.mode,
1161 u'mtime': tarinfo.mtime,
1162 u'ctime': -1, # cannot restore
1163 u'uid': tarinfo.uid,
1164 u'gid': tarinfo.gid,
1165 u'inode': -1, # cannot restore
1166 u'size': tarinfo.size,
1167 u'member': tarinfo
ec57ce53
ERE
1168 }, 0
1169
26fdd428 1170 return TarPathIterator(self, tar_path, new_volume_handler)
24ddf0a2 1171
df99a044 1172 def jsonize_path_iterator(self, iter, strip=0):
d07c8065
ERE
1173 '''
1174 converts the yielded items of an iterator into json path lines.
df99a044
ERE
1175
1176 strip: Strip the smallest prefix containing num leading slashes from
1177 the file path.
d07c8065
ERE
1178 '''
1179 while True:
1180 try:
be60ffd0 1181 path = iter.__next__()
df99a044 1182 if strip == 0:
4ac6d333 1183 yield self._stat_dict(path), 0
df99a044
ERE
1184 else:
1185 st = self._stat_dict(path)
1186 st['path'] = "/".join(path.split("/")[strip:])
4ac6d333 1187 yield st, 0
d07c8065
ERE
1188 except StopIteration:
1189 break
1190
b84beea7
PG
1191 def iterate_disaster_index (self, index):
1192 """
1193 Mimick the behavior of the other object iterators, just with the inputs
1194 supplied directly as *index*.
1195 """
1196
1197 class RawIndexIterator(object):
65b35c42 1198 def __init__(self, delta_tar, index):
b84beea7
PG
1199 self.delta_tar = delta_tar
1200 self.index = index
1201 self.__enter__()
1202
1203 def __iter__(self):
1204 return self
1205
1206 def release(self):
65b35c42 1207 pass
b84beea7
PG
1208
1209 def __enter__(self):
1210 '''
1211 Allows this iterator to be used with the "with" statement
1212 '''
1213 self.iter = self.index.__iter__ ()
1214 return self
1215
1216 def __exit__(self, type, value, tb):
1217 '''
1218 Allows this iterator to be used with the "with" statement
1219 '''
1220
1221 def __next__(self):
1222 idxent = self.iter.__next__ ()
65b35c42 1223 return idxent, 0
b84beea7
PG
1224
1225 return RawIndexIterator(self, index)
1226
d07c8065
ERE
1227 def collate_iterators(self, it1, it2):
1228 '''
1229 Collate two iterators, so that it returns pairs of the items of each
1230 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1231 when there's no match for the items in the other iterator.
1232
1233 It assumes that the items in both lists are ordered in the same way.
1234 '''
ea6d3c3e 1235 l_no = 0
d07c8065
ERE
1236 elem1, elem2 = None, None
1237 while True:
1238 if not elem1:
1239 try:
be60ffd0 1240 elem1, l_no = it1.__next__()
d07c8065
ERE
1241 except StopIteration:
1242 if elem2:
ea6d3c3e 1243 yield (None, elem2, l_no)
d07c8065 1244 for elem2 in it2:
ea6d3c3e
ERE
1245 if isinstance(elem2, tuple):
1246 elem2 = elem2[0]
1247 yield (None, elem2, l_no)
d07c8065 1248 break
d07c8065
ERE
1249 if not elem2:
1250 try:
be60ffd0 1251 elem2 = it2.__next__()
d07c8065
ERE
1252 if isinstance(elem2, tuple):
1253 elem2 = elem2[0]
1254 except StopIteration:
1255 if elem1:
ea6d3c3e 1256 yield (elem1, None, l_no)
df99a044 1257 for elem1, l_no in it1:
ea6d3c3e 1258 yield (elem1, None, l_no)
d07c8065 1259 break
670f9934
ERE
1260
1261 index1 = self.unprefixed(elem1['path'])
1262 index2 = self.unprefixed(elem2['path'])
1263 i1, i2 = self.compare_indexes(index1, index2)
1264
1265 yield1 = yield2 = None
1266 if i1 is not None:
1267 yield1 = elem1
1268 elem1 = None
1269 if i2 is not None:
1270 yield2 = elem2
1271 elem2 = None
1272 yield (yield1, yield2, l_no)
1273
1274 def compare_indexes(self, index1, index2):
1275 '''
1276 Compare iterator indexes and return a tuple in the following form:
1277 if index1 < index2, returns (index1, None)
1278 if index1 == index2 returns (index1, index2)
1279 else: returns (None, index2)
1280 '''
1281 l1 = index1.split('/')
1282 l2 = index2.split('/')
1283 length = len(l2) - len(l1)
1284
1285 if length > 0:
1286 return (index1, None)
1287 elif length < 0:
1288 return (None, index2)
1289
1290 for i1, i2 in zip(l1, l2):
1291 if i1 < i2:
1292 return (index1, None)
1293 elif i1 > i2:
1294 return (None, index2)
1295
1296 return (index1, index2)
0708a374 1297
8c65a2b1 1298 def list_backup(self, backup_tar_path, list_func=None):
be60ffd0 1299 if not isinstance(backup_tar_path, str):
8c65a2b1
ERE
1300 raise Exception('Backup tar path must be a string')
1301
1302 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1303 raise Exception('Source path "%s" does not exist or is not a '\
1304 'file' % backup_tar_path)
1305
1306 if not os.access(backup_tar_path, os.R_OK):
1307 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1308
1309 cwd = os.getcwd()
1310
b7c47f38 1311 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
8c65a2b1
ERE
1312 '''
1313 Handles the new volumes
1314 '''
1315 volume_name = deltarobj.volume_name_func(backup_path, True,
1316 volume_number, guess_name=True)
1317 volume_path = os.path.join(backup_path, volume_name)
1318
1319 # we convert relative paths into absolute because CWD is changed
1320 if not os.path.isabs(volume_path):
1321 volume_path = os.path.join(cwd, volume_path)
b7c47f38
PG
1322 tarobj.open_volume(volume_path, encryption=encryption)
1323
774ca538
PG
1324 if self.decryptor is None:
1325 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
8c65a2b1
ERE
1326
1327 backup_path = os.path.dirname(backup_tar_path)
1328 if not os.path.isabs(backup_path):
1329 backup_path = os.path.join(cwd, backup_path)
133d30da 1330 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
b7a6566b 1331
8c65a2b1
ERE
1332 tarobj = tarfile.TarFile.open(backup_tar_path,
1333 mode='r' + self.mode,
1334 format=tarfile.GNU_FORMAT,
d1c38f40 1335 concat='#' in self.mode,
133d30da 1336 encryption=self.decryptor,
ea625b04 1337 new_volume_handler=new_volume_handler,
e2b59b34
ERE
1338 save_to_members=False,
1339 dereference=True)
8c65a2b1
ERE
1340
1341 def filter(cls, list_func, tarinfo):
1342 if list_func is None:
b008f989 1343 self.logger.info(tarinfo.path)
8c65a2b1
ERE
1344 else:
1345 list_func(tarinfo)
1346 return False
1347 filter = partial(filter, self, list_func)
1348
1349 tarobj.extractall(filter=filter)
1350 tarobj.close()
1351
0708a374 1352 def restore_backup(self, target_path, backup_indexes_paths=[],
e93f83f1 1353 backup_tar_path=None, restore_callback=None,
b84beea7 1354 disaster=tarfile.TOLERANCE_STRICT, backup_index=None):
0708a374
ERE
1355 '''
1356 Restores a backup.
1357
1358 Parameters:
0708a374
ERE
1359 - target_path: path to restore.
1360 - backup_indexes_paths: path to backup indexes, in descending date order.
1361 The indexes indicate the location of their respective backup volumes,
1362 and multiple indexes are needed to be able to restore diff backups.
1363 Note that this is an optional parameter: if not suplied, it will
1364 try to restore directly from backup_tar_path.
1365 - backup_tar_path: path to the backup tar file. Used as an alternative
1366 to backup_indexes_paths to restore directly from a tar file without
1367 using any file index. If it's a multivol tarfile, volume_name_func
1368 will be called.
4da27cfe 1369 - restore_callback: callback function to be called during restore.
b0aef801 1370 This is passed to the helper and gets called for every file.
11684b1d 1371
3a7e1a50 1372 NOTE: If you want to use an index to restore a backup, this function
11684b1d
ERE
1373 only supports to do so when the tarfile mode is either uncompressed or
1374 uses concat compress mode, because otherwise it would be very slow.
3a7e1a50
ERE
1375
1376 NOTE: Indices are assumed to follow the same format as the index_mode
1377 specified in the constructor.
e93f83f1
PG
1378
1379 Returns the list of files that could not be restored, if there were
1380 any.
0708a374 1381 '''
11684b1d 1382 # check/sanitize input
be60ffd0 1383 if not isinstance(target_path, str):
e5c6ca04
ERE
1384 raise Exception('Target path must be a string')
1385
11684b1d
ERE
1386 if backup_indexes_paths is None and backup_tar_path == []:
1387 raise Exception("You have to either provide index paths or a tar path")
e5c6ca04 1388
b84beea7
PG
1389 if isinstance (backup_index, list) is True:
1390 mode = "disaster"
1391 elif len(backup_indexes_paths) == 0:
ea6d3c3e
ERE
1392 mode = "tar"
1393 else:
1394 mode = "diff"
1395
1396 if mode == "tar":
be60ffd0 1397 if not isinstance(backup_tar_path, str):
11684b1d
ERE
1398 raise Exception('Backup tar path must be a string')
1399
1400 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1401 raise Exception('Source path "%s" does not exist or is not a '\
1402 'file' % backup_tar_path)
1403
1404 if not os.access(backup_tar_path, os.R_OK):
1405 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1406 else:
1407 if not isinstance(backup_indexes_paths, list):
1408 raise Exception('backup_indexes_paths must be a list')
1409
1410 if self.mode.startswith(':') or self.mode.startswith('|'):
1411 raise Exception('Restore only supports either uncompressed tars'
1412 ' or concat compression when restoring from an index, and '
1413 ' the open mode you provided is "%s"' % self.mode)
1414
1415 for index in backup_indexes_paths:
be60ffd0 1416 if not isinstance(index, str):
11684b1d 1417 raise Exception('indices must be strings')
e5c6ca04 1418
11684b1d
ERE
1419 if not os.path.exists(index) or not os.path.isfile(index):
1420 raise Exception('Index path "%s" does not exist or is not a '\
1421 'file' % index)
1422
1423 if not os.access(index, os.R_OK):
1424 raise Exception('Index path "%s" is not readable' % index)
e5c6ca04
ERE
1425
1426 # try to create backup path if needed
1427 if not os.path.exists(target_path):
1428 os.makedirs(target_path)
1429
ec57ce53
ERE
1430 # make backup_tar_path absolute so that iterate_tar_path works fine
1431 if backup_tar_path and not os.path.isabs(backup_tar_path):
1432 backup_tar_path = os.path.abspath(backup_tar_path)
1433
d5361dac 1434 cwd = os.getcwd()
ec57ce53 1435 os.chdir(target_path)
d5361dac 1436
2ae46844 1437 # setup for decrypting payload
774ca538
PG
1438 if self.decryptor is None:
1439 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
2ae46844 1440
ea6d3c3e 1441 if mode == 'tar':
24ddf0a2
ERE
1442 index_it = self.iterate_tar_path(backup_tar_path)
1443 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
ec57ce53 1444 tarobj=index_it.tar_obj)
ea6d3c3e 1445 elif mode == "diff":
04f4c7ab
PG
1446 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1447 disaster=disaster)
f3d10816
PG
1448 try:
1449 # get iterator from newest index at _data[0]
1450 index1 = helper._data[0]["path"]
1451 index_it = self.iterate_index_path(index1)
1452 except tarfile.DecryptionError as exn:
1453 self.logger.error("failed to decrypt file [%s]: %s; is this an "
afc87ebc
PG
1454 "actual encrypted index file?"
1455 % (index1, str (exn)))
1456 return [(index1, exn)]
1457 except Exception as exn:
1458 # compressed files
1459 self.logger.error("failed to read file [%s]: %s; is this an "
1460 "actual index file?" % (index1, str (exn)))
f3d10816 1461 return [(index1, exn)]
b84beea7
PG
1462 elif mode == "disaster":
1463 index_it = self.iterate_disaster_index (backup_index)
65b35c42
PG
1464 helper = RestoreHelper (self, cwd, backup_path=backup_tar_path,
1465 backup_index=backup_index,
1466 disaster=disaster)
b84beea7 1467
d07c8065 1468
24ddf0a2
ERE
1469 dir_it = self._recursive_walk_dir('.')
1470 dir_path_it = self.jsonize_path_iterator(dir_it)
11684b1d 1471
e93f83f1
PG
1472 failed = [] # irrecoverable files
1473
a395759e 1474 # for each file to be restored, do:
24ddf0a2
ERE
1475 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1476 if not ipath:
1477 upath = dpath['path']
1478 op_type = dpath['type']
1479 else:
1480 upath = self.unprefixed(ipath['path'])
1481 op_type = ipath['type']
42c04ead 1482
24ddf0a2 1483 # filter paths
75059f3c 1484 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
24ddf0a2 1485 continue
ea6d3c3e 1486
24ddf0a2
ERE
1487 # if types of the file mismatch, the file needs to be deleted
1488 # and re-restored
1489 if ipath is not None and dpath is not None and\
1490 dpath['type'] != ipath['type']:
1491 helper.delete(upath)
1492
1493 # if file not found in dpath, we can directly restore from index
1494 if not dpath:
1495 # if the file doesn't exist and it needs to be deleted, it
1496 # means that work is already done
1497 if ipath['path'].startswith('delete://'):
ea6d3c3e 1498 continue
24ddf0a2 1499 try:
b008f989 1500 self.logger.debug("restore %s" % ipath['path'])
4da27cfe 1501 helper.restore(ipath, l_no, restore_callback)
be60ffd0 1502 except Exception as e:
e93f83f1 1503 iipath = ipath.get ("path", "")
7b07645e 1504 self.logger.error("FAILED to restore: {} ({})"
e93f83f1 1505 .format(iipath, e))
04f4c7ab 1506 if disaster != tarfile.TOLERANCE_STRICT:
e93f83f1 1507 failed.append ((iipath, e))
24ddf0a2 1508 continue
11684b1d 1509
24ddf0a2
ERE
1510 # if both files are equal, we have nothing to restore
1511 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1512 continue
1513
1514 # we have to restore the file, but first we need to delete the
1515 # current existing file.
1516 # we don't delete the file if it's a directory, because it might
1517 # just have changed mtime, so it's quite inefficient to remove
1518 # it
1519 if ipath:
1520 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
42c04ead 1521 helper.delete(upath)
b008f989 1522 self.logger.debug("restore %s" % ipath['path'])
e93f83f1
PG
1523 try:
1524 helper.restore(ipath, l_no, restore_callback)
1525 except Exception as e:
04f4c7ab 1526 if disaster == tarfile.TOLERANCE_STRICT:
e93f83f1
PG
1527 raise
1528 failed.append ((ipath.get ("path", ""), e))
1529 continue
24ddf0a2
ERE
1530
1531 # if the file is not in the index (so it comes from the target
1532 # directory) then we have to delete it
1533 else:
c9d47a03 1534 self.logger.debug("delete %s" % upath)
24ddf0a2 1535 helper.delete(upath)
42c04ead 1536
ec57ce53
ERE
1537 helper.restore_directories_permissions()
1538 index_it.release()
1539 os.chdir(cwd)
1540 helper.cleanup()
ea6d3c3e 1541
e93f83f1
PG
1542 return failed
1543
1544
1545 def recover_backup(self, target_path, backup_indexes_paths=[],
1546 restore_callback=None):
1547 """
1548 Walk the index, extracting objects in disaster mode. Bad files are
1549 reported along with a reason.
1550 """
1551 return self.restore_backup(target_path,
1552 backup_indexes_paths=backup_indexes_paths,
04f4c7ab
PG
1553 disaster=tarfile.TOLERANCE_RECOVER)
1554
1555
6690f5e0 1556 def rescue_backup(self, target_path, backup_tar_path,
04f4c7ab
PG
1557 restore_callback=None):
1558 """
1559 More aggressive “unfsck” mode: do not rely on the index data as the
1560 files may be corrupt; skim files for header-like information and
1561 attempt to retrieve the data.
1562 """
27ee4dd4
PG
1563 def gen_volume_name (nvol):
1564 return os.path.join (os.path.dirname (backup_tar_path),
1565 self.volume_name_func (backup_tar_path,
1566 True,
1567 nvol))
1568
1569 backup_index = tarfile.gen_rescue_index (gen_volume_name,
1570 self.mode,
1571 password=self.password,
1572 key=self.crypto_key)
6690f5e0 1573
04f4c7ab 1574 return self.restore_backup(target_path,
b84beea7 1575 backup_index=backup_index,
65b35c42 1576 backup_tar_path=backup_tar_path,
04f4c7ab 1577 disaster=tarfile.TOLERANCE_RESCUE)
e93f83f1
PG
1578
1579
11684b1d
ERE
1580 def _parse_json_line(self, f, l_no):
1581 '''
ee0e095f 1582 Read line from file like object and process it as JSON.
11684b1d
ERE
1583 '''
1584 l = f.readline()
1585 l_no += 1
1586 try:
be60ffd0 1587 j = json.loads(l.decode('UTF-8'))
ee0e095f
PG
1588 except UnicodeDecodeError as e:
1589 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1590 raise Exception \
1591 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1592 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1593 from e
1594 raise Exception \
1595 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1596 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1597 from e
be60ffd0 1598 except ValueError as e:
11684b1d
ERE
1599 raise Exception("error parsing this json line "
1600 "(line number %d): %s" % (l_no, l))
1601 return j, l_no
ea6d3c3e 1602
24ddf0a2 1603
ea6d3c3e
ERE
1604class RestoreHelper(object):
1605 '''
1606 Class used to help to restore files from indices
1607 '''
1608
1609 # holds the dicts of data
1610 _data = []
1611
1612 _deltatar = None
1613
1614 _cwd = None
1615
0501fe0a
ERE
1616 # list of directories to be restored. This is done as a last step, see
1617 # tarfile.extractall for details.
1618 _directories = []
1619
04f4c7ab 1620 _disaster = tarfile.TOLERANCE_STRICT
e93f83f1 1621
037994ca 1622 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
65b35c42
PG
1623 backup_index=None, tarobj=None,
1624 disaster=tarfile.TOLERANCE_STRICT):
ea6d3c3e
ERE
1625 '''
1626 Constructor opens the tars and init the data structures.
1627
037994ca
PG
1628 Assumptions:
1629
1630 - Index list must be provided in reverse order (newer first).
1631 - “newer first” apparently means that if there are n backups
1632 provided, the last full backup is at index n-1 and the most recent
1633 diff backup is at index 0.
1634 - Only the first, the second, and the last elements of
1635 ``index_list`` are relevant, others will not be accessed.
1636 - If no ``index_list`` is provided, both ``tarobj`` and
1637 ``backup_path`` must be passed.
1638 - If ``index_list`` is provided, the values of ``tarobj`` and
1639 ``backup_path`` are ignored.
ea6d3c3e
ERE
1640 '''
1641 self._data = []
0501fe0a 1642 self._directories = []
ea6d3c3e
ERE
1643 self._deltatar = deltatar
1644 self._cwd = cwd
3031b7ae 1645 self._password = deltatar.password
1f3fd7b0 1646 self._crypto_key = deltatar.crypto_key
3031b7ae 1647 self._decryptors = []
e93f83f1 1648 self._disaster = disaster
ea6d3c3e 1649
253d4cdd
ERE
1650 try:
1651 import grp, pwd
1652 except ImportError:
1653 grp = pwd = None
1654
1655 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1656 self.canchown = True
1657 else:
1658 self.canchown = False
1659
65b35c42 1660 if isinstance (backup_index, list) is True:
001bd488 1661 decryptor = self._deltatar.decryptor
65b35c42
PG
1662 self._data = \
1663 [{ "curr_vol_no" : None
1664 , "vol_fd" : None
1665 , "offset" : -1
1666 , "tarobj" : None
1667 , "path" : backup_path
1668 , "is_full" : True
1669 , "iterator" : None
1670 , "last_itelement" : None
1671 , "last_lno" : 0
001bd488
PG
1672 , "new_volume_handler" :
1673 partial(self.new_volume_handler,
1674 self._deltatar, self._cwd, True,
1675 os.path.dirname(backup_path), decryptor)
1676 , "decryptor" : decryptor
65b35c42
PG
1677 }]
1678 elif index_list is not None:
24ddf0a2 1679 for index in index_list:
037994ca 1680 is_full = index == index_list[-1]
24ddf0a2 1681
d5e1d60f 1682 decryptor = None
3031b7ae 1683 if self._password is not None:
1f3fd7b0
PG
1684 decryptor = crypto.Decrypt (password=self._password,
1685 key=self._crypto_key)
d5e1d60f 1686
24ddf0a2
ERE
1687 # make paths absolute to avoid cwd problems
1688 if not os.path.isabs(index):
1689 index = os.path.normpath(os.path.join(cwd, index))
1690
1691 s = dict(
1692 curr_vol_no = None,
1693 vol_fd = None,
1694 offset = -1,
1695 tarobj = None,
1696 path = index,
1697 is_full = is_full,
1698 iterator = None,
1699 last_itelement = None,
1700 last_lno = 0,
1701 new_volume_handler = partial(self.new_volume_handler,
1702 self._deltatar, self._cwd, is_full,
d5e1d60f
PG
1703 os.path.dirname(index), decryptor),
1704 decryptor = decryptor
24ddf0a2
ERE
1705 )
1706 self._data.append(s)
1707 else:
ea6d3c3e 1708 # make paths absolute to avoid cwd problems
24ddf0a2
ERE
1709 if not os.path.isabs(backup_path):
1710 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
ea6d3c3e 1711
ec57ce53
ERE
1712 # update the new_volume_handler of tar_obj
1713 tarobj.new_volume_handler = partial(self.new_volume_handler,
b7c47f38 1714 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
133d30da 1715 self._deltatar.decryptor)
ea6d3c3e
ERE
1716 s = dict(
1717 curr_vol_no = None,
1718 vol_fd = None,
1719 offset = -1,
24ddf0a2
ERE
1720 tarobj = tarobj,
1721 path = backup_path,
1722 is_full = True,
670f9934
ERE
1723 iterator = None,
1724 last_itelement = None,
1725 last_lno = 0,
d5e1d60f
PG
1726 new_volume_handler = tarobj.new_volume_handler,
1727 decryptor = self._deltatar.decryptor
ea6d3c3e
ERE
1728 )
1729 self._data.append(s)
1730
3031b7ae 1731
ea6d3c3e
ERE
1732 def cleanup(self):
1733 '''
1734 Closes all open files
1735 '''
1736 for data in self._data:
55b2ffd0
ERE
1737 if data['vol_fd']:
1738 data['vol_fd'].close()
1739 data['vol_fd'] = None
ea6d3c3e
ERE
1740 if data['tarobj']:
1741 data['tarobj'].close()
1742 data['tarobj'] = None
ea6d3c3e
ERE
1743
1744 def delete(self, path):
1745 '''
1746 Delete a file
1747 '''
df99a044
ERE
1748 if not os.path.exists(path):
1749 return
1750
24ddf0a2 1751 # to preserve parent directory mtime, we save it
283fbd5e 1752 parent_dir = os.path.dirname(path) or os.getcwd()
24ddf0a2
ERE
1753 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1754
561bc39f 1755 if os.path.isdir(path) and not os.path.islink(path):
ea6d3c3e
ERE
1756 shutil.rmtree(path)
1757 else:
1758 os.unlink(path)
1759
24ddf0a2
ERE
1760 # now we restore parent_directory mtime
1761 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1762
4da27cfe 1763 def restore(self, itpath, l_no, callback=None):
ea6d3c3e 1764 '''
8a54d5dd 1765 Restore the path from the appropriate backup. Receives the current path
e8d95fe5 1766 from the newest (=first) index iterator. itpath must be not null.
b0aef801 1767 callback is a custom function that gets called for every file.
037994ca
PG
1768
1769 NB: This function takes the attribute ``_data`` as input but will only
1770 ever use its first and, if available, second element. Anything else in
1771 ``._data[]`` will be ignored.
ea6d3c3e 1772 '''
ea6d3c3e
ERE
1773 path = itpath['path']
1774
4da27cfe
SA
1775 # Calls the callback function
1776 if callback:
1777 callback()
1778
ea6d3c3e 1779 if path.startswith('delete://'):
df86af81
ERE
1780 # the file has previously been deleted already in restore_backup in
1781 # all cases so we just need to finish
ea6d3c3e 1782 return
df86af81 1783
e8d95fe5 1784 # get data from newest index (_data[0])
df86af81
ERE
1785 data = self._data[0]
1786 upath = self._deltatar.unprefixed(path)
1787
24ddf0a2 1788 # to preserve parent directory mtime, we save it
283fbd5e 1789 parent_dir = os.path.dirname(upath) or os.getcwd()
ec57ce53
ERE
1790 if not os.path.exists(parent_dir):
1791 os.makedirs(parent_dir)
24ddf0a2
ERE
1792 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1793
e8d95fe5 1794 # if path is found in the newest index as to be snapshotted, deal with it
df86af81
ERE
1795 # and finish
1796 if path.startswith('snapshot://'):
65b35c42 1797 self.restore_file(itpath, data, path, l_no, upath)
24ddf0a2
ERE
1798
1799 # now we restore parent_directory mtime
1800 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
ea6d3c3e
ERE
1801 return
1802
1803 # we go from index to index, finding the path in the index, then finding
1804 # the index with the most recent snapshot of the file being restored
e8d95fe5
TJ
1805 #
1806 # Right now we support diff backups, only. No incremental backups.
1807 # As a result _data[0] is always the diff backup index
1808 # and _data[1] the full backup index.
527670c4 1809 if len(self._data) == 2:
7273719c 1810 data = self._data[1]
527670c4
TJ
1811 d, l_no, dpath = self.find_path_in_index(data, upath)
1812 if not d:
1813 self._deltatar.logger.warning('Error restoring file %s from '
1814 'index, not found in index %s' % (path, data['path']))
1815 return
1816
1817 cur_path = d.get('path', '')
1818 if cur_path.startswith('delete://'):
1819 self._deltatar.logger.warning(('Strange thing happened, file '
1820 '%s was listed in first index but deleted by another '
1821 'one. Path was ignored and untouched.') % path)
1822 return
1823 elif cur_path.startswith('snapshot://'):
1824 # this code path is reached when the file is unchanged
1825 # in the newest index and therefore of type 'list://'
1826 self.restore_file(d, data, path, l_no, dpath)
1827
1828 # now we restore parent_directory mtime
1829 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1830 return
1831
1832 # error code path is reached when:
1833 # a) we have more than two indexes (unsupported atm)
1834 # b) both indexes contain a list:// entry (logic error)
1835 # c) we have just one index and it also contains list://
4bda6f45 1836 self._deltatar.logger.warning(('Error restoring file %s from index, '
ea6d3c3e
ERE
1837 'snapshot not found in any index') % path)
1838
670f9934
ERE
1839 def find_path_in_index(self, data, upath):
1840 # NOTE: we restart the iterator sometimes because the iterator can be
1841 # walked over completely multiple times, for example if one path if not
1842 # found in one index and we have to go to the next index.
7273719c
PG
1843 it = data['iterator']
1844 if it is None:
670f9934 1845 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
be60ffd0 1846 d, l_no = it.__next__()
670f9934 1847 else:
670f9934
ERE
1848 d = data['last_itelement']
1849 l_no = data['last_lno']
1850
670f9934 1851 while True:
7273719c 1852 dpath = self._deltatar.unprefixed(d.get('path', ''))
670f9934
ERE
1853 if upath == dpath:
1854 data['last_itelement'] = d
1855 data['last_lno'] = l_no
1856 return d, l_no, dpath
1857
1858 up, dp = self._deltatar.compare_indexes(upath, dpath)
1859 # any time upath should have appeared before current dpath, it means
1860 # upath is just not in this index and we should stop
1861 if dp is None:
1862 data['last_itelement'] = d
1863 data['last_lno'] = l_no
1864 return None, 0, ''
1865
1866 try:
be60ffd0 1867 d, l_no = it.__next__()
670f9934
ERE
1868 except StopIteration:
1869 data['last_itelement'] = d
1870 data['last_lno'] = l_no
1871 return None, 0, ''
670f9934 1872
0501fe0a
ERE
1873 def restore_directories_permissions(self):
1874 '''
1875 Restore directory permissions when everything have been restored
1876 '''
42c04ead
ERE
1877 try:
1878 import grp, pwd
1879 except ImportError:
1880 grp = pwd = None
1881
0501fe0a
ERE
1882 self._directories.sort(key=operator.attrgetter('name'))
1883 self._directories.reverse()
0501fe0a
ERE
1884
1885 # Set correct owner, mtime and filemode on directories.
1886 for member in self._directories:
1887 dirpath = member.name
1888 try:
42c04ead
ERE
1889 os.chmod(dirpath, member.mode)
1890 os.utime(dirpath, (member.mtime, member.mtime))
253d4cdd 1891 if self.canchown:
42c04ead
ERE
1892 # We have to be root to do so.
1893 try:
1894 g = grp.getgrnam(member.gname)[2]
1895 except KeyError:
1896 g = member.gid
1897 try:
1898 u = pwd.getpwnam(member.uname)[2]
1899 except KeyError:
1900 u = member.uid
1901 try:
4e433e00 1902 if member.issym and hasattr(os, "lchown"):
42c04ead
ERE
1903 os.lchown(dirpath, u, g)
1904 else:
1905 os.chown(dirpath, u, g)
1906 except EnvironmentError:
1907 raise tarfile.ExtractError("could not change owner")
1908
be60ffd0 1909 except tarfile.ExtractError as e:
4bda6f45 1910 self._deltatar.logger.warning('tarfile: %s' % e)
0501fe0a 1911
df86af81 1912 @staticmethod
b7c47f38 1913 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
ea6d3c3e
ERE
1914 '''
1915 Handles the new volumes
1916 '''
df86af81
ERE
1917 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1918 volume_number, guess_name=True)
ea6d3c3e
ERE
1919 volume_path = os.path.join(backup_path, volume_name)
1920
1921 # we convert relative paths into absolute because CWD is changed
1922 if not os.path.isabs(volume_path):
1923 volume_path = os.path.join(cwd, volume_path)
b7c47f38 1924 tarobj.open_volume(volume_path, encryption=encryption)
ea6d3c3e 1925
253d4cdd 1926 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
ea6d3c3e
ERE
1927 '''
1928 Restores a snapshot of a file from a specific backup
1929 '''
ea6d3c3e 1930 op_type = file_data.get('type', -1)
24ddf0a2 1931 member = file_data.get('member', None)
9f9ae874 1932 ismember = bool(member)
24ddf0a2
ERE
1933
1934 # when member is set, then we can assume everything is right and we
1935 # just have to restore the path
a2a37de7 1936 if member is None:
24ddf0a2
ERE
1937 vol_no = file_data.get('volume', -1)
1938 # sanity check
1939 if not isinstance(vol_no, int) or vol_no < 0:
4bda6f45 1940 self._deltatar.logger.warning('unrecognized type to be restored: '
24ddf0a2
ERE
1941 '%s, line %d' % (op_type, l_no))
1942
1943 # setup the volume that needs to be read. only needed when member is
1944 # not set
a2a37de7 1945 if index_data['curr_vol_no'] != vol_no:
24ddf0a2
ERE
1946 index_data['curr_vol_no'] = vol_no
1947 backup_path = os.path.dirname(index_data['path'])
1948 vol_name = self._deltatar.volume_name_func(backup_path,
1949 index_data['is_full'], vol_no, guess_name=True)
1950 vol_path = os.path.join(backup_path, vol_name)
1951 if index_data['vol_fd']:
1952 index_data['vol_fd'].close()
be60ffd0 1953 index_data['vol_fd'] = open(vol_path, 'rb')
24ddf0a2
ERE
1954
1955 # force reopen of the tarobj because of new volume
1956 if index_data['tarobj']:
1957 index_data['tarobj'].close()
1958 index_data['tarobj'] = None
1959
1960 # seek tarfile if needed
1961 offset = file_data.get('offset', -1)
ea6d3c3e 1962 if index_data['tarobj']:
c52fd26b 1963 if self._disaster == tarfile.TOLERANCE_RESCUE:
24ddf0a2
ERE
1964 # force a seek and reopen
1965 index_data['tarobj'].close()
1966 index_data['tarobj'] = None
c52fd26b
PG
1967 else:
1968 try:
1969 member = index_data['tarobj'].__iter__().__next__()
1970 except tarfile.DecryptionError:
1971 pass
1972 except tarfile.CompressionError:
1973 pass
1974
1975 if not member or member.path != file_data['path']:
1976 # force a seek and reopen
1977 index_data['tarobj'].close()
1978 index_data['tarobj'] = None
1979
24ddf0a2
ERE
1980
1981 # open the tarfile if needed
1982 if not index_data['tarobj']:
1983 index_data['vol_fd'].seek(offset)
1984 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1985 fileobj=index_data['vol_fd'],
1986 format=tarfile.GNU_FORMAT,
d1c38f40 1987 concat='#' in self._deltatar.mode,
d5e1d60f 1988 encryption=index_data["decryptor"],
253d4cdd 1989 new_volume_handler=index_data['new_volume_handler'],
044585c6 1990 save_to_members=False,
04f4c7ab 1991 tolerance=self._disaster)
24ddf0a2 1992
be60ffd0 1993 member = index_data['tarobj'].__iter__().__next__()
ea6d3c3e 1994
253d4cdd
ERE
1995 member.path = unprefixed_path
1996 member.name = unprefixed_path
0501fe0a
ERE
1997
1998 if op_type == 'directory':
253d4cdd 1999 self.add_member_dir(member)
0501fe0a 2000 member = copy.copy(member)
be60ffd0 2001 member.mode = 0o0700
0501fe0a 2002
df86af81
ERE
2003 # if it's an existing directory, we then don't need to recreate it
2004 # just set the right permissions, mtime and that kind of stuff
2005 if os.path.exists(member.path):
2006 return
2007
9f9ae874 2008 if not ismember:
24ddf0a2
ERE
2009 # set current volume number in tarobj, otherwise the extraction of the
2010 # file might fail when trying to extract a multivolume member
2011 index_data['tarobj'].volume_number = index_data['curr_vol_no']
86a6e741 2012
9b13f5c4
PG
2013 def ignore_symlink (member, *_args):
2014 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
786addd6 2015
ea6d3c3e 2016 # finally, restore the file
9b13f5c4 2017 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
253d4cdd
ERE
2018
2019 def add_member_dir(self, member):
2020 '''
2021 Add member dir to be restored at the end
2022 '''
4e433e00 2023 if not self.canchown:
253d4cdd
ERE
2024 self._directories.append(DirItem(name=member.name, mode=member.mode,
2025 mtime=member.mtime))
2026 else:
2027 self._directories.append(DirItem(name=member.name, mode=member.mode,
2028 mtime=member.mtime, gname=member.gname, uname=member.uname,
4e433e00 2029 uid=member.uid, gid=member.gid, issym=member.issym()))
253d4cdd
ERE
2030
2031class DirItem(object):
2032 def __init__(self, **kwargs):
be60ffd0 2033 for k, v in kwargs.items():
9f9ae874 2034 setattr(self, k, v)