add iterator mode for reconstructed index
[python-delta-tar] / deltatar / deltatar.py
CommitLineData
6b2fa38f 1#!/usr/bin/env python3
0708a374 2
51797cd6 3# Copyright (C) 2013, 2014 Intra2net AG
0708a374
ERE
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; either version 3 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see
17# <http://www.gnu.org/licenses/lgpl-3.0.html>
18
19# Author: Eduardo Robles Elvira <edulix@wadobo.com>
20
938c2d54
PG
21DELTATAR_HEADER_VERSION = 1
22DELTATAR_PARAMETER_VERSION = 1
3fdea6d4 23
0708a374
ERE
24import logging
25import datetime
6c678f3a 26import binascii
938c2d54 27import io
0501fe0a 28import operator
0708a374 29import os
0501fe0a 30import copy
82de3376 31import shutil
8a8fadda 32import re
e82f14f5
ERE
33import stat
34import json
0708a374
ERE
35from functools import partial
36
37from . import tarfile
2ae46844 38from . import crypto
0708a374 39
0708a374
ERE
40class NullHandler(logging.Handler):
41 def emit(self, record):
42 pass
24ddf0a2
ERE
43
44
0708a374
ERE
45logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
46
974408b5
ERE
47
48# match mode
49NO_MATCH = False
50MATCH = True
51PARENT_MATCH = 2
52
133d30da
PG
53# encryption direction
54CRYPTO_MODE_ENCRYPT = 0
55CRYPTO_MODE_DECRYPT = 1
56
13cc7dfc
PG
57# The canonical extension for encrypted backup files regardless of the actual
58# encryption parameters is “.pdtcrypt”. This is analogous to the encryption
59# header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
60# Since the introduction of the versioned header there no longer any need
61# for encoding encryption parameters in the file extensions (“.aes128” and
62# suchlike).
63PDTCRYPT_EXTENSION = "pdtcrypt"
2cdd9faf
PG
64PDT_TYPE_ARCHIVE = 0
65PDT_TYPE_AUX = 1
13cc7dfc 66
9eccb1c2
PG
67AUXILIARY_FILE_INDEX = 0
68AUXILIARY_FILE_INFO = 1
69
0708a374
ERE
70class DeltaTar(object):
71 '''
72 Backup class used to create backups
73 '''
74
75 # list of files to exclude in the backup creation or restore operation. It
76 # can contain python regular expressions.
77 excluded_files = []
78
79 # list of files to include in the backup creation or restore operation. It
80 # can contain python regular expressions. If empty, all files in the source
81 # path will be backed up (when creating a backup) or all the files in the
a83fa4ed 82 # backup will be restored (when restoring a backup), but if included_files
0708a374
ERE
83 # is set then only the files include in the list will be processed.
84 included_files = []
85
86 # custom filter of files to be backed up (or restored). Unused and unset
87 # by default. The function receives a file path and must return a boolean.
88 filter_func = None
89
da26094a
ERE
90 # mode in which the delta will be created (when creating a backup) or
91 # opened (when restoring). Accepts modes analog to the tarfile library.
92 mode = ""
0708a374
ERE
93
94 # used together with aes modes to encrypt and decrypt backups.
95 password = None
1f3fd7b0
PG
96 crypto_key = None
97 nacl = None
0708a374 98
dbee011c
PG
99 # parameter version to use when encrypting; note that this has no effect
100 # on decryption since the required settings are determined from the headers
54f909ca 101 crypto_version = DELTATAR_HEADER_VERSION
dbee011c
PG
102 crypto_paramversion = None
103
133d30da 104 # when encrypting or decrypting, these hold crypto handlers; created before
2ae46844 105 # establishing the Tarfile stream iff a password is supplied.
133d30da
PG
106 encryptor = None
107 decryptor = None
2ae46844 108
0708a374
ERE
109 # python logger object.
110 logger = None
111
3a7e1a50
ERE
112 # specifies the index mode in the same format as @param mode, but without
113 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
2ae46844 114 # that the index is encrypted if no password is given in the constructor.
3a7e1a50 115 index_mode = None
0708a374
ERE
116
117 # current time for this backup. Used for file names and file creation checks
118 current_time = None
119
9eae9a1f
ERE
120 # extra data to included in the header of the index file when creating a
121 # backup
122 extra_data = dict()
123
0708a374
ERE
124 # valid tarfile modes and their corresponding default file extension
125 __file_extensions_dict = {
da26094a
ERE
126 '': '',
127 ':': '',
128 ':gz': '.gz',
129 ':bz2': '.bz2',
130 '|': '',
131 '|gz': '.gz',
132 '|bz2': '.bz2',
133 '#gz': '.gz',
6e99d23a
PG
134 '#gz.pdtcrypt': '.gz',
135 '#pdtcrypt': '',
d1c38f40 136 '#': '',
0708a374
ERE
137 }
138
3a7e1a50
ERE
139 # valid index modes and their corresponding default file extension
140 __index_extensions_dict = {
141 '': '',
142 'gz': '.gz',
143 'bz2': '.bz2',
6e99d23a
PG
144 'gz.pdtcrypt': '.gz',
145 'pdtcrypt': '',
3a7e1a50
ERE
146 }
147
8adbe50d
ERE
148 # valid path prefixes
149 __path_prefix_list = [
150 u'snapshot://',
151 u'list://',
152 u'delete://'
153 ]
154
0708a374 155 def __init__(self, excluded_files=[], included_files=[],
da26094a 156 filter_func=None, mode="", password=None,
1f3fd7b0 157 crypto_key=None, nacl=None,
54f909ca 158 crypto_version=DELTATAR_HEADER_VERSION,
dbee011c 159 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
3a7e1a50 160 logger=None, index_mode=None, index_name_func=None,
0708a374
ERE
161 volume_name_func=None):
162 '''
163 Constructor. Configures the diff engine.
164
165 Parameters:
166 - excluded_files: list of files to exclude in the backup creation or
167 restore operation. It can contain python regular expressions.
168
169 - included_files: list of files to include in the backup creation or
170 restore operation. It can contain python regular expressions. If
171 empty, all files in the source path will be backed up (when creating a
172 backup) or all the files in the backup will be restored (when
a83fa4ed 173 restoring a backup), but if included_files is set then only the files
0708a374
ERE
174 include in the list will be processed.
175
176 - filter_func: custom filter of files to be backed up (or restored).
177 Unused and unset by default. The function receives a file path and
178 must return a boolean.
179
180 - mode: mode in which the delta will be created (when creating a backup)
181 or opened (when restoring). Accepts the same modes as the tarfile
182 library. Valid modes are:
183
da26094a
ERE
184 '' open uncompressed
185 ':' open uncompressed
186 ':gz' open with gzip compression
187 ':bz2' open with bzip2 compression
188 '|' open an uncompressed stream of tar blocks
189 '|gz' open a gzip compressed stream of tar blocks
190 '|bz2' open a bzip2 compressed stream of tar blocks
191 '#gz' open a stream of gzip compressed tar blocks
0708a374 192
1f3fd7b0
PG
193 - crypto_key: used to encrypt and decrypt backups. Encryption will
194 be enabled automatically if a key is supplied. Requires a salt to be
195 passed as well.
196
197 - nacl: salt that was used to derive the encryption key for embedding
198 in the PDTCRYPT header. Not needed when decrypting and when
199 encrypting with password.
200
6e99d23a
PG
201 - password: used to encrypt and decrypt backups. Encryption will be
202 enabled automatically if a password is supplied.
0708a374 203
54f909ca
PG
204 - crypto_version: version of the format, determining the kind of PDT
205 object header.
206
dbee011c
PG
207 - crypto_paramversion: optionally request encryption conforming to
208 a specific parameter version. Defaults to the standard PDT value
209 which as of 2017 is the only one available.
210
0708a374
ERE
211 - logger: python logger object. Optional.
212
3a7e1a50 213 - index_mode: specifies the index mode in the same format as @param
6e99d23a
PG
214 mode, but without the ':', '|' or '#' at the begining. If encryption
215 is requested it will extend to the auxiliary (index, info) files as
216 well. This is an optional parameter that will automatically mimic
217 @param mode by default if not provided. Valid modes are:
3a7e1a50
ERE
218
219 '' open uncompressed
220 'gz' open with gzip compression
221 'bz2' open with bzip2 compression
0708a374
ERE
222
223 - index_name_func: function that sets a custom name for the index file.
2cc6e32b
PG
224 This function receives a flag to indicate whether the name will be
225 used for a full or diff backup. The backup path will be prepended to
226 its return value.
0708a374
ERE
227
228 - volume_name_func: function that defines the name of tar volumes. It
229 receives the backup_path, if it's a full backup and the volume number,
230 and must return the name for the corresponding volume name. Optional,
231 DeltaTar has default names for tar volumes.
232 '''
233
da26094a 234 if mode not in self.__file_extensions_dict:
8a54d5dd
PG
235 raise Exception('Unrecognized extension mode=[%s] requested for files'
236 % str(mode))
0708a374
ERE
237
238 self.excluded_files = excluded_files
239 self.included_files = included_files
240 self.filter_func = filter_func
241 self.logger = logging.getLogger('deltatar.DeltaTar')
242 if logger:
243 self.logger.addHandler(logger)
244 self.mode = mode
2ae46844 245
1f3fd7b0
PG
246 if crypto_key is not None:
247 self.crypto_key = crypto_key
248 self.nacl = nacl # encryption only
249
2ae46844
PG
250 if password is not None:
251 self.password = password
3a7e1a50 252
54f909ca
PG
253 if crypto_version is not None:
254 self.crypto_version = crypto_version
255
dbee011c
PG
256 if crypto_paramversion is not None:
257 self.crypto_paramversion = crypto_paramversion
258
3a7e1a50
ERE
259 # generate index_mode
260 if index_mode is None:
261 index_mode = ''
6e99d23a 262 if 'gz' in mode:
3a7e1a50
ERE
263 index_mode = "gz"
264 elif 'bz2' in mode:
265 index_mode = "bz2"
266 elif mode not in self.__index_extensions_dict:
8a54d5dd
PG
267 raise Exception('Unrecognized extension mode=[%s] requested for index'
268 % str(mode))
3a7e1a50
ERE
269
270 self.index_mode = index_mode
0708a374
ERE
271 self.current_time = datetime.datetime.now()
272
273 if index_name_func is not None:
274 self.index_name_func = index_name_func
275
276 if volume_name_func is not None:
277 self.volume_name_func = volume_name_func
278
e54cfec5 279 def pick_extension(self, kind, mode=None):
2cdd9faf
PG
280 """
281 Choose the extension depending on a) the kind of file given, b) the
282 processing mode, and c) the current encryption settings.
283 """
284 ret = ""
285 if kind == PDT_TYPE_ARCHIVE:
286 ret += ".tar"
e54cfec5
PG
287 if mode is None:
288 mode = self.__index_extensions_dict [self.index_mode]
2cdd9faf 289 ret += mode
a83fa4ed 290 if self.crypto_key is not None or self.password is not None:
2cdd9faf
PG
291 ret += "." + PDTCRYPT_EXTENSION
292 return ret
293
f0287fb7 294 def index_name_func(self, is_full): # pylint: disable=method-hidden
0708a374 295 '''
2cc6e32b
PG
296 Callback for setting a custom name for the index file. Depending on
297 whether *is_full* is set, it will create a suitable name for a full
298 or a diff backup.
0708a374
ERE
299 '''
300 prefix = "bfull" if is_full else "bdiff"
f7940c31 301 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf
PG
302 extension = self.pick_extension \
303 (PDT_TYPE_AUX,
304 self.__index_extensions_dict [self.index_mode])
0708a374 305
da26094a 306 return "%s-%s.index%s" % (prefix, date_str, extension)
0708a374 307
f0287fb7
CH
308 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
309 is_full, volume_number,
310 guess_name=False):
0708a374
ERE
311 '''
312 function that defines the name of tar volumes. It receives the
313 backup_path, if it's a full backup and the volume number, and must return
314 the name for the corresponding volume name. Optional, DeltaTar has default
315 names for tar volumes.
df86af81
ERE
316
317 If guess_name is activated, the file is intended not to be created but
318 to be found, and thus the date will be guessed.
0708a374
ERE
319 '''
320 prefix = "bfull" if is_full else "bdiff"
2cdd9faf
PG
321 extension = self.pick_extension \
322 (PDT_TYPE_ARCHIVE,
323 self.__file_extensions_dict [self.mode])
0708a374 324
df86af81 325 if not guess_name:
f7940c31 326 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
2cdd9faf 327 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
df86af81
ERE
328 else:
329 prefix = prefix + "-"
90b75470 330 postfix = "-%03d%s" % (volume_number + 1, extension)
86a6e741
ERE
331 for f in os.listdir(backup_path):
332 if f.startswith(prefix) and f.endswith(postfix):
333 return f
df86af81
ERE
334 raise Exception("volume not found")
335
0708a374 336
974408b5 337 def filter_path(self, path, source_path="", is_dir=None):
8a8fadda
ERE
338 '''
339 Filters a path, given the source_path, using the filtering properties
340 set in the constructor.
341 The filtering order is:
342 1. included_files (if any)
343 2. excluded_files
344 3. filter_func (which must return whether the file is accepted or not)
345 '''
75059f3c 346
c1af2184 347 if len(source_path) > 0:
75059f3c
CH
348 # ensure that exactly one '/' at end of dir is also removed
349 source_path = source_path.rstrip(os.sep) + os.sep
8a8fadda
ERE
350 path = path[len(source_path):]
351
352 # 1. filter included_files
974408b5 353 match = MATCH
8a8fadda 354 if len(self.included_files) > 0:
974408b5 355 match = NO_MATCH
8a8fadda
ERE
356 for i in self.included_files:
357 # it can be either a regexp or a string
be60ffd0 358 if isinstance(i, str):
8a8fadda
ERE
359 # if the string matches, then continue
360 if i == path:
974408b5 361 match = MATCH
c1af2184 362 break
8a8fadda
ERE
363
364 # if the string ends with / it's a directory, and if the
7b07645e 365 # path is contained in it, it is included
c1af2184 366 if i.endswith('/') and path.startswith(i):
974408b5 367 match = MATCH
c1af2184 368 break
8a8fadda
ERE
369
370 # if the string doesn't end with /, add it and do the same
371 # check
c1af2184 372 elif path.startswith(i + '/'):
974408b5 373 match = MATCH
c1af2184 374 break
8a8fadda 375
974408b5
ERE
376 # check for PARENT_MATCH
377 if is_dir:
378 dir_path = path
379 if not dir_path.endswith('/'):
380 dir_path += '/'
381
382 if i.startswith(dir_path):
383 match = PARENT_MATCH
384
8a8fadda
ERE
385 # if it's a reg exp, then we just check if it matches
386 elif isinstance(i, re._pattern_type):
c1af2184 387 if i.match(path):
974408b5 388 match = MATCH
c1af2184 389 break
8a8fadda 390 else:
4bda6f45 391 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
8a8fadda 392
974408b5
ERE
393 if match == NO_MATCH:
394 return NO_MATCH
c1af2184 395
974408b5
ERE
396 # when a directory is in PARENT_MATCH, it doesn't matter if it's
397 # excluded. It's subfiles will be excluded, but the directory itself
398 # won't
399 if match != PARENT_MATCH:
8a8fadda
ERE
400 for e in self.excluded_files:
401 # it can be either a regexp or a string
be60ffd0 402 if isinstance(e, str):
8a8fadda 403 # if the string matches, then exclude
c1af2184 404 if e == path:
974408b5 405 return NO_MATCH
8a8fadda
ERE
406
407 # if the string ends with / it's a directory, and if the
408 # path starts with the directory, then exclude
c1af2184 409 if e.endswith('/') and path.startswith(e):
974408b5 410 return NO_MATCH
8a8fadda
ERE
411
412 # if the string doesn't end with /, do the same check with
413 # the slash added
c1af2184 414 elif path.startswith(e + '/'):
974408b5 415 return NO_MATCH
8a8fadda
ERE
416
417 # if it's a reg exp, then we just check if it matches
c1af2184
ERE
418 elif isinstance(e, re._pattern_type):
419 if e.match(path):
974408b5 420 return NO_MATCH
8a8fadda 421 else:
4bda6f45 422 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
8a8fadda
ERE
423
424 if self.filter_func:
425 return self.filter_func(path)
426
974408b5 427 return match
8a8fadda 428
283fbd5e 429 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
0708a374
ERE
430 '''
431 Walk a directory recursively, yielding each file/directory
0708a374
ERE
432 '''
433
283fbd5e 434 source_path = source_path.rstrip(os.sep)
0708a374 435
283fbd5e 436 if keep_base_dir:
adf7dac4 437 beginning_size = 0
283fbd5e
CH
438 else:
439 beginning_size = len(source_path) + 1 # +1 for os.sep
440
441 queue = [source_path]
442
d07c8065 443 while queue:
df86af81 444 cur_path = queue.pop(0)
0708a374 445
d86735e4
ERE
446 # it might have been removed in the mean time
447 if not os.path.exists(cur_path):
448 continue
449
7dec665c
CH
450 for filename in sorted(os.listdir(cur_path)):
451 child = os.path.join(cur_path, filename)
d07c8065
ERE
452 is_dir = os.path.isdir(child)
453 status = self.filter_path(child, source_path, is_dir)
7dec665c
CH
454 if status == NO_MATCH:
455 continue
456 if not os.access(child, os.R_OK):
4bda6f45 457 self.logger.warning('Error accessing possibly locked file %s' % child)
7dec665c 458 continue
8a8fadda 459
d07c8065 460 if status == MATCH:
adf7dac4 461 yield child[beginning_size:]
0708a374 462
d07c8065
ERE
463 if is_dir and (status == MATCH or status == PARENT_MATCH):
464 queue.append(child)
0708a374 465
e82f14f5
ERE
466 def _stat_dict(self, path):
467 '''
468 Returns a dict with the stat data used to compare files
469 '''
470 stinfo = os.stat(path)
471 mode = stinfo.st_mode
472
473 ptype = None
474 if stat.S_ISDIR(mode):
d07c8065 475 ptype = u'directory'
e82f14f5 476 elif stat.S_ISREG(mode):
d07c8065 477 ptype = u'file'
e82f14f5 478 elif stat.S_ISLNK(mode):
d07c8065 479 ptype = u'link'
e82f14f5
ERE
480
481 return {
d07c8065 482 u'type': ptype,
be60ffd0 483 u'path': path,
d07c8065 484 u'mode': mode,
0501fe0a
ERE
485 u'mtime': int(stinfo.st_mtime),
486 u'ctime': int(stinfo.st_ctime),
d07c8065
ERE
487 u'uid': stinfo.st_uid,
488 u'gid': stinfo.st_gid,
489 u'inode': stinfo.st_ino,
490 u'size': stinfo.st_size
e82f14f5
ERE
491 }
492
df99a044 493 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
d07c8065
ERE
494 '''
495 Return if the dicts are equal in the stat keys
496 '''
fc8fdcbc 497 keys = [u'type', u'mode',u'size', u'mtime',
d041935c 498 # not restored: u'inode', u'ctime'
df99a044 499 ]
8adbe50d 500
fc8fdcbc 501 # only if user is root, then also check gid/uid. otherwise do not check it,
d041935c 502 # because tarfile can chown in case of being superuser only
50d70ca9
PG
503 #
504 # also, skip the check in rpmbuild since the sources end up with the
505 # uid:gid of the packager while the extracted files are 0:0.
506 if hasattr(os, "geteuid") and os.geteuid() == 0 \
507 and os.getenv ("RPMBUILD_OPTIONS") is None:
fc8fdcbc
ERE
508 keys.append('gid')
509 keys.append('uid')
510
ea6d3c3e 511 if (not d1 and d2 != None) or (d1 != None and not d2):
8adbe50d
ERE
512 return False
513
cbac9f0b
ERE
514 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
515 return False
8adbe50d 516
fc8fdcbc
ERE
517 type = d1.get('type', '')
518
d07c8065 519 for key in keys:
fc8fdcbc
ERE
520 # size doesn't matter for directories
521 if type == 'directory' and key == 'size':
522 continue
d07c8065
ERE
523 if d1.get(key, -1) != d2.get(key, -2):
524 return False
525 return True
526
df99a044 527 def prefixed(self, path, listsnapshot_equal=False):
8adbe50d
ERE
528 '''
529 if a path is not prefixed, return it prefixed
530 '''
531 for prefix in self.__path_prefix_list:
532 if path.startswith(prefix):
df99a044
ERE
533 if listsnapshot_equal and prefix == u'list://':
534 return u'snapshot://' + path[len(prefix):]
8adbe50d
ERE
535 return path
536 return u'snapshot://' + path
537
538 def unprefixed(self, path):
539 '''
540 remove a path prefix if any
541 '''
542 for prefix in self.__path_prefix_list:
543 if path.startswith(prefix):
544 return path[len(prefix):]
545 return path
546
133d30da
PG
547
548 def initialize_encryption (self, mode):
549 password = self.password
1f3fd7b0
PG
550 key = self.crypto_key
551 nacl = self.nacl
133d30da 552
1f3fd7b0 553 if key is None and password is None:
133d30da
PG
554 return
555 if mode == CRYPTO_MODE_ENCRYPT:
1f3fd7b0
PG
556 return crypto.Encrypt (password=password,
557 key=key,
558 nacl=nacl,
54f909ca 559 version=self.crypto_version,
774ca538 560 paramversion=self.crypto_paramversion)
133d30da 561 if mode == CRYPTO_MODE_DECRYPT:
1f3fd7b0 562 return crypto.Decrypt (password=password, key=key)
133d30da
PG
563
564 raise Exception ("invalid encryption mode [%r]" % mode)
565
566
9eccb1c2 567 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
3a7e1a50 568 '''
9eccb1c2
PG
569 Given the specified configuration, opens a file for reading or writing,
570 inheriting the encryption and compression settings from the backup.
571 Returns a file object ready to use.
3fdea6d4 572
c8c72fe1
PG
573 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
574 respectively).
575 :type mode: str
774ca538
PG
576 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
577 Both the info and the auxiliary file have a globally
578 unique, constant counter value.
3fdea6d4 579 :type kind: str
3a7e1a50 580 '''
3a7e1a50
ERE
581 if self.index_mode.startswith('gz'):
582 comptype = 'gz'
583 elif self.index_mode.startswith('bz2'):
584 comptype = 'bz2'
585 else:
586 comptype = 'tar'
587
133d30da 588 crypto_ctx = None
6de9444a 589 enccounter = None
133d30da 590 if mode == "w":
774ca538 591 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 592 elif mode == "r":
774ca538 593 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
133d30da 594
3031b7ae
PG
595 if crypto_ctx is not None:
596 if kind == AUXILIARY_FILE_INFO:
597 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
598 elif kind == AUXILIARY_FILE_INDEX:
599 enccounter = crypto.AES_GCM_IV_CNT_INDEX
600 else:
601 raise Exception ("invalid kind of aux file %r" % kind)
602
c8c72fe1 603 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
3fdea6d4 604 bufsize=tarfile.RECORDSIZE, fileobj=None,
6de9444a 605 encryption=crypto_ctx, enccounter=enccounter)
c8c72fe1
PG
606
607 return sink
608
3a7e1a50 609
0708a374 610 def create_full_backup(self, source_path, backup_path,
d4a05db6 611 max_volume_size=None, extra_data=dict()):
0708a374
ERE
612 '''
613 Creates a full backup.
614
615 Parameters:
616 - source_path: source path to the directory to back up.
617 - backup_path: path where the back up will be stored. Backup path will
618 be created if not existent.
d5361dac
ERE
619 - max_volume_size: maximum volume size in megabytes. Used to split the
620 backup in volumes. Optional (won't split in volumes by default).
9eae9a1f
ERE
621 - extra_data: a json-serializable dictionary with information that you
622 want to be included in the header of the index file
0708a374
ERE
623 '''
624 # check input
be60ffd0 625 if not isinstance(source_path, str):
0708a374
ERE
626 raise Exception('Source path must be a string')
627
be60ffd0 628 if not isinstance(backup_path, str):
0708a374
ERE
629 raise Exception('Backup path must be a string')
630
631 if not os.path.exists(source_path) or not os.path.isdir(source_path):
632 raise Exception('Source path "%s" does not exist or is not a '\
633 'directory' % source_path)
634
d07c8065
ERE
635 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
636 max_volume_size < 1):
637 raise Exception('max_volume_size must be a positive integer')
d5361dac
ERE
638 if max_volume_size != None:
639 max_volume_size = max_volume_size*1024*1024
640
9eae9a1f
ERE
641 if not isinstance(extra_data, dict):
642 raise Exception('extra_data must be a dictionary')
643
644 try:
645 extra_data_str = json.dumps(extra_data)
646 except:
647 raise Exception('extra_data is not json-serializable')
648
0708a374
ERE
649 if not os.access(source_path, os.R_OK):
650 raise Exception('Source path "%s" is not readable' % source_path)
651
652 # try to create backup path if needed
653 if not os.path.exists(backup_path):
d4a05db6 654 os.makedirs(backup_path)
0708a374
ERE
655
656 if not os.access(backup_path, os.W_OK):
657 raise Exception('Backup path "%s" is not writeable' % backup_path)
658
659 if source_path.endswith('/'):
660 source_path = source_path[:-1]
661
662 if backup_path.endswith('/'):
663 backup_path = backup_path[:-1]
664
665 # update current time
666 self.current_time = datetime.datetime.now()
667
668 if self.mode not in self.__file_extensions_dict:
669 raise Exception('Unrecognized extension')
670
2ae46844 671 # setup for encrypting payload
774ca538
PG
672 if self.encryptor is None:
673 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
2ae46844 674
0708a374 675 # some initialization
11684b1d 676 self.vol_no = 0
0708a374
ERE
677
678 # generate the first volume name
679 vol_name = self.volume_name_func(backup_path, True, 0)
680 tarfile_path = os.path.join(backup_path, vol_name)
681
774ca538
PG
682 # init index
683 index_name = self.index_name_func(True)
684 index_path = os.path.join(backup_path, index_name)
685 index_sink = self.open_auxiliary_file(index_path, 'w')
e82f14f5 686
d5361dac
ERE
687 cwd = os.getcwd()
688
b7c47f38 689 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
0708a374
ERE
690 '''
691 Handles the new volumes
692 '''
d5361dac
ERE
693 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
694 volume_path = os.path.join(backup_path, volume_name)
11684b1d 695 deltarobj.vol_no = volume_number
d5361dac
ERE
696
697 # we convert relative paths into absolute because CWD is changed
698 if not os.path.isabs(volume_path):
699 volume_path = os.path.join(cwd, volume_path)
11684b1d 700
8e019196
ERE
701 if tarobj.fileobj is not None:
702 tarobj.fileobj.close()
703
b008f989
ERE
704 deltarobj.logger.debug("opening volume %s" % volume_path)
705
b7c47f38 706 tarobj.open_volume(volume_path, encryption=encryption)
d5361dac
ERE
707
708 # wraps some args from context into the handler
133d30da 709 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
0708a374 710
774ca538 711 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
6c678f3a 712
be60ffd0 713 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
6c678f3a 714 # calculate checksum and write into the stream
c2ffe2ec 715 crc = binascii.crc32(s) & 0xFFFFffff
774ca538 716 index_sink.write(s)
e82f14f5 717
0708a374
ERE
718 # start creating the tarfile
719 tarobj = tarfile.TarFile.open(tarfile_path,
da26094a 720 mode='w' + self.mode,
0708a374 721 format=tarfile.GNU_FORMAT,
d1c38f40 722 concat='#' in self.mode,
133d30da 723 encryption=self.encryptor,
0708a374 724 max_volume_size=max_volume_size,
ea625b04 725 new_volume_handler=new_volume_handler,
e2b59b34
ERE
726 save_to_members=False,
727 dereference=True)
e5c6ca04 728 os.chdir(source_path)
55b8686d
ERE
729
730 # for each file to be in the backup, do:
e82f14f5 731 for path in self._recursive_walk_dir('.'):
55b8686d 732 # calculate stat dict for current file
253d4cdd
ERE
733 statd = self._stat_dict(path)
734 statd['path'] = u'snapshot://' + statd['path']
735 statd['volume'] = self.vol_no
55b8686d
ERE
736
737 # backup file
253d4cdd 738 tarobj.add(path, arcname = statd['path'], recursive=False)
11684b1d 739
55b8686d 740 # retrieve file offset
253d4cdd 741 statd['offset'] = tarobj.get_last_member_offset()
b008f989 742 self.logger.debug("backup %s" % statd['path'])
6c678f3a 743
d041935c 744 # store the stat dict in the index
be60ffd0 745 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
6c678f3a 746 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 747 index_sink.write(s)
e82f14f5 748
be60ffd0 749 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
6c678f3a 750 crc = binascii.crc32(s, crc) & 0xffffffff
774ca538 751 index_sink.write(s)
be60ffd0 752 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
774ca538
PG
753 index_sink.write(s)
754
e5c6ca04 755 os.chdir(cwd)
0708a374 756 tarobj.close()
c8c72fe1 757 index_sink.close (close_fileobj=True)
938c2d54 758
0708a374 759 def create_diff_backup(self, source_path, backup_path, previous_index_path,
d4a05db6 760 max_volume_size=None, extra_data=dict()):
0708a374
ERE
761 '''
762 Creates a backup.
763
764 Parameters:
765 - source_path: source path to the directory to back up.
766 - backup_path: path where the back up will be stored. Backup path will
767 be created if not existent.
768 - previous_index_path: index of the previous backup, needed to know
769 which files changed since then.
770 - max_volume_size: maximum volume size in megabytes (MB). Used to split
771 the backup in volumes. Optional (won't split in volumes by default).
3a7e1a50
ERE
772
773 NOTE: previous index is assumed to follow exactly the same format as
774 the index_mode setup in the constructor.
0708a374 775 '''
d07c8065 776 # check/sanitize input
be60ffd0 777 if not isinstance(source_path, str):
d07c8065
ERE
778 raise Exception('Source path must be a string')
779
be60ffd0 780 if not isinstance(backup_path, str):
d07c8065
ERE
781 raise Exception('Backup path must be a string')
782
783 if not os.path.exists(source_path) or not os.path.isdir(source_path):
784 raise Exception('Source path "%s" does not exist or is not a '\
785 'directory' % source_path)
786
9eae9a1f
ERE
787 if not isinstance(extra_data, dict):
788 raise Exception('extra_data must be a dictionary')
789
790 try:
791 extra_data_str = json.dumps(extra_data)
792 except:
793 raise Exception('extra_data is not json-serializable')
794
d07c8065
ERE
795 if not os.access(source_path, os.R_OK):
796 raise Exception('Source path "%s" is not readable' % source_path)
797
798 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
799 max_volume_size < 1):
800 raise Exception('max_volume_size must be a positive integer')
801 if max_volume_size != None:
802 max_volume_size = max_volume_size*1024*1024
803
be60ffd0 804 if not isinstance(previous_index_path, str):
d07c8065
ERE
805 raise Exception('previous_index_path must be A string')
806
807 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
808 raise Exception('Index path "%s" does not exist or is not a '\
809 'file' % previous_index_path)
810
811 if not os.access(previous_index_path, os.R_OK):
812 raise Exception('Index path "%s" is not readable' % previous_index_path)
813
814 # try to create backup path if needed
815 if not os.path.exists(backup_path):
d4a05db6 816 os.makedirs(backup_path)
d07c8065
ERE
817
818 if not os.access(backup_path, os.W_OK):
819 raise Exception('Backup path "%s" is not writeable' % backup_path)
820
821 if source_path.endswith('/'):
822 source_path = source_path[:-1]
823
824 if backup_path.endswith('/'):
825 backup_path = backup_path[:-1]
826
827 # update current time
828 self.current_time = datetime.datetime.now()
829
830 if self.mode not in self.__file_extensions_dict:
831 raise Exception('Unrecognized extension')
832
2ae46844 833 # setup for encrypting payload
774ca538
PG
834 if self.encryptor is None:
835 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
133d30da 836
d07c8065
ERE
837 # some initialization
838 self.vol_no = 0
839
840 # generate the first volume name
df86af81
ERE
841 vol_name = self.volume_name_func(backup_path, is_full=False,
842 volume_number=0)
d07c8065
ERE
843 tarfile_path = os.path.join(backup_path, vol_name)
844
938c2d54 845 # init index
d07c8065
ERE
846 cwd = os.getcwd()
847
3031b7ae
PG
848 index_name = self.index_name_func(is_full=False)
849 index_path = os.path.join(backup_path, index_name)
850 index_sink = self.open_auxiliary_file(index_path, 'w')
851
d07c8065
ERE
852 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
853 '''
854 Handles the new volumes
855 '''
df86af81
ERE
856 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
857 volume_number=volume_number)
d07c8065
ERE
858 volume_path = os.path.join(backup_path, volume_name)
859 deltarobj.vol_no = volume_number
860
861 # we convert relative paths into absolute because CWD is changed
862 if not os.path.isabs(volume_path):
863 volume_path = os.path.join(cwd, volume_path)
864
f624ff3d 865 deltarobj.logger.debug("opening volume %s" % volume_path)
d07c8065
ERE
866 tarobj.open_volume(volume_path)
867
868 # wraps some args from context into the handler
869 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
870
3031b7ae 871 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
d07c8065 872
be60ffd0 873 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
d07c8065 874 # calculate checksum and write into the stream
c2ffe2ec 875 crc = binascii.crc32(s) & 0xFFFFffff
3031b7ae 876 index_sink.write(s)
d07c8065
ERE
877
878 # start creating the tarfile
879 tarobj = tarfile.TarFile.open(tarfile_path,
880 mode='w' + self.mode,
881 format=tarfile.GNU_FORMAT,
d1c38f40 882 concat='#' in self.mode,
133d30da 883 encryption=self.encryptor,
d07c8065 884 max_volume_size=max_volume_size,
ea625b04 885 new_volume_handler=new_volume_handler,
e2b59b34
ERE
886 save_to_members=False,
887 dereference=True)
d07c8065 888
aae127d0
ERE
889
890 # create the iterators, first the previous index iterator, then the
891 # source path directory iterator and collate and iterate them
892 if not os.path.isabs(previous_index_path):
893 previous_index_path = os.path.join(cwd, previous_index_path)
894 index_it = self.iterate_index_path(previous_index_path)
895
d07c8065 896 os.chdir(source_path)
aae127d0
ERE
897 dir_it = self._recursive_walk_dir('.')
898 dir_path_it = self.jsonize_path_iterator(dir_it)
d07c8065 899
df86af81
ERE
900 def pr(path):
901 if not path:
902 return "None"
903 else:
904 return path["path"]
8edb2e3c 905
d07c8065 906 # for each file to be in the backup, do:
df86af81 907 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
aae127d0
ERE
908 action = None
909 # if file is not in the index, it means it's a new file, so we have
910 # to take a snapshot
df86af81 911
aae127d0
ERE
912 if not ipath:
913 action = 'snapshot'
914 # if the file is not in the directory iterator, it means that it has
d041935c 915 # been deleted, so we need to mark it as such
aae127d0
ERE
916 elif not dpath:
917 action = 'delete'
918 # if the file is in both iterators, it means it might have either
919 # not changed (in which case we will just list it in our index but
920 # it will not be included in the tar file), or it might have
e8d95fe5 921 # changed, in which case we will snapshot it.
aae127d0
ERE
922 elif ipath and dpath:
923 if self._equal_stat_dicts(ipath, dpath):
924 action = 'list'
925 else:
926 action = 'snapshot'
927 # TODO: when creating chained backups (i.e. diffing from another
928 # diff), we will need to detect the type of action in the previous
929 # index, because if it was delete and dpath is None, we should
930 # discard the file
931
932 if action == 'snapshot':
933 # calculate stat dict for current file
934 stat = dpath.copy()
be60ffd0 935 stat['path'] = "snapshot://" + dpath['path']
aae127d0
ERE
936 stat['volume'] = self.vol_no
937
50f43227
ERE
938 self.logger.debug("[STORE] %s" % dpath['path'])
939
aae127d0 940 # backup file
8adbe50d 941 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
aae127d0
ERE
942
943 # retrieve file offset
944 stat['offset'] = tarobj.get_last_member_offset()
aae127d0 945 elif action == 'delete':
50f43227 946 path = self.unprefixed(ipath['path'])
aae127d0 947 stat = {
50f43227 948 u'path': u'delete://' + path,
aae127d0
ERE
949 u'type': ipath['type']
950 }
50f43227 951 self.logger.debug("[DELETE] %s" % path)
aae127d0
ERE
952
953 # mark it as deleted in the backup
42d39ca7 954 tarobj.add("/dev/null", arcname=stat['path'])
aae127d0
ERE
955 elif action == 'list':
956 stat = dpath.copy()
50f43227
ERE
957 path = self.unprefixed(ipath['path'])
958 stat['path'] = u'list://' + path
aae127d0 959 # unchanged files do not enter in the backup, only in the index
50f43227 960 self.logger.debug("[UNCHANGED] %s" % path)
80910564
TJ
961 else:
962 # should not happen
4bda6f45 963 self.logger.warning('unknown action in create_diff_backup: {0}'
80910564
TJ
964 ''.format(action))
965 stat = None
aae127d0 966
80910564
TJ
967 if stat:
968 # store the stat dict in the index
be60ffd0 969 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
aae127d0 970 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 971 index_sink.write(s)
aae127d0 972
be60ffd0 973 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
aae127d0 974 crc = binascii.crc32(s, crc) & 0xffffffff
3031b7ae 975 index_sink.write(s)
be60ffd0 976 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
3031b7ae 977 index_sink.write(s)
938c2d54 978
df86af81 979 index_it.release()
aae127d0
ERE
980 os.chdir(cwd)
981 tarobj.close()
938c2d54
PG
982 index_sink.close()
983
984
d07c8065 985 def iterate_index_path(self, index_path):
df86af81
ERE
986 '''
987 Returns an index iterator. Internally, it uses a classic iterator class.
988 We do that instead of just yielding so that the iterator object can have
989 an additional function to close the file descriptor that is opened in
990 the constructor.
991 '''
d07c8065 992
df86af81
ERE
993 class IndexPathIterator(object):
994 def __init__(self, delta_tar, index_path):
995 self.delta_tar = delta_tar
996 self.index_path = index_path
997 self.f = None
9eae9a1f 998 self.extra_data = dict()
df86af81 999 self.__enter__()
d07c8065 1000
df86af81
ERE
1001 def __iter__(self):
1002 return self
d07c8065 1003
df86af81
ERE
1004 def release(self):
1005 if self.f:
1006 self.f.close()
1007
1008 def __enter__(self):
1009 '''
1010 Allows this iterator to be used with the "with" statement
1011 '''
1012 if self.f is None:
9eccb1c2 1013 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
df86af81
ERE
1014 # check index header
1015 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1016 if j.get("type", '') != 'python-delta-tar-index' or\
1017 j.get('version', -1) != 1:
1018 raise Exception("invalid index file format: %s" % json.dumps(j))
1019
9eae9a1f
ERE
1020 self.extra_data = j.get('extra_data', dict())
1021
df86af81
ERE
1022 # find BEGIN-FILE-LIST, ignore other headers
1023 while True:
1024 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1025 if j.get('type', '') == 'BEGIN-FILE-LIST':
1026 break
1027 return self
1028
1029 def __exit__(self, type, value, tb):
1030 '''
1031 Allows this iterator to be used with the "with" statement
1032 '''
ec57ce53
ERE
1033 if self.f:
1034 self.f.close()
df86af81 1035 self.f = None
d07c8065 1036
be60ffd0 1037 def __next__(self):
0349168a 1038 # read each file in the index and process it to do the restore
df86af81
ERE
1039 j = {}
1040 l_no = -1
1041 try:
1042 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
be60ffd0 1043 except Exception as e:
df86af81
ERE
1044 if self.f:
1045 self.f.close()
1046 raise e
d07c8065 1047
df86af81 1048 op_type = j.get('type', '')
d07c8065 1049
df86af81
ERE
1050 # when we detect the end of the list, break the loop
1051 if op_type == 'END-FILE-LIST':
1052 if self.f:
1053 self.f.close()
1054 raise StopIteration
1055
1056 # check input
1057 if op_type not in ['directory', 'file', 'link']:
4bda6f45 1058 self.delta_tar.logger.warning('unrecognized type to be '
df86af81
ERE
1059 'restored: %s, line %d' % (op_type, l_no))
1060 # iterate again
be60ffd0 1061 return self.__next__()
df86af81
ERE
1062
1063 return j, l_no
d07c8065 1064
df86af81 1065 return IndexPathIterator(self, index_path)
d07c8065 1066
26fdd428 1067 def iterate_tar_path(self, tar_path, new_volume_handler=None):
24ddf0a2
ERE
1068 '''
1069 Returns a tar iterator that iterates jsonized member items that contain
1070 an additional "member" field, used by RestoreHelper.
1071 '''
ec57ce53 1072 class TarPathIterator(object):
83a81852 1073 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
24ddf0a2 1074 self.delta_tar = delta_tar
ec57ce53 1075 self.tar_path = tar_path
24ddf0a2 1076 self.tar_obj = None
6bca471c 1077 self.last_member = None
26fdd428 1078 self.new_volume_handler = new_volume_handler
24ddf0a2
ERE
1079 self.__enter__()
1080
1081 def __iter__(self):
1082 return self
1083
1084 def release(self):
1085 if self.tar_obj:
1086 self.tar_obj.close()
1087
1088 def __enter__(self):
1089 '''
1090 Allows this iterator to be used with the "with" statement
1091 '''
1092 if self.tar_obj is None:
d5e1d60f
PG
1093 decryptor = None
1094 if self.delta_tar.password is not None:
1f3fd7b0
PG
1095 decryptor = crypto.Decrypt \
1096 (password=self.delta_tar.password,
1097 key=self.delta_tar.crypto_key)
ec57ce53
ERE
1098 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1099 mode='r' + self.delta_tar.mode,
1100 format=tarfile.GNU_FORMAT,
d1c38f40 1101 concat='#' in self.delta_tar.mode,
d5e1d60f 1102 encryption=decryptor,
83a81852 1103 new_volume_handler=self.new_volume_handler,
e2b59b34
ERE
1104 save_to_members=False,
1105 dereference=True)
24ddf0a2
ERE
1106 return self
1107
1108 def __exit__(self, type, value, tb):
1109 '''
1110 Allows this iterator to be used with the "with" statement
1111 '''
ec57ce53
ERE
1112 if self.tar_obj:
1113 self.tar_obj.close()
24ddf0a2
ERE
1114 self.tar_obj = None
1115
be60ffd0 1116 def __next__(self):
24ddf0a2
ERE
1117 '''
1118 Read each member and return it as a stat dict
1119 '''
be60ffd0 1120 tarinfo = self.tar_obj.__iter__().__next__()
8e019196
ERE
1121 # NOTE: here we compare if tarinfo.path is the same as before
1122 # instead of comparing the tarinfo object itself because the
1123 # object itself might change for multivol tarinfos
1124 if tarinfo is None or (self.last_member is not None and\
1125 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
ec57ce53
ERE
1126 raise StopIteration
1127
6bca471c
ERE
1128 self.last_member = tarinfo
1129
24ddf0a2
ERE
1130 ptype = 'unknown'
1131 if tarinfo.isfile():
1132 ptype = 'file'
1133 elif tarinfo.isdir():
ab7e7465 1134 ptype = 'directory'
24ddf0a2
ERE
1135 elif tarinfo.islnk() or tarinfo.issym():
1136 ptype = 'link'
1137
1138 return {
1139 u'type': ptype,
1140 u'path': tarinfo.path,
1141 u'mode': tarinfo.mode,
1142 u'mtime': tarinfo.mtime,
1143 u'ctime': -1, # cannot restore
1144 u'uid': tarinfo.uid,
1145 u'gid': tarinfo.gid,
1146 u'inode': -1, # cannot restore
1147 u'size': tarinfo.size,
1148 u'member': tarinfo
ec57ce53
ERE
1149 }, 0
1150
26fdd428 1151 return TarPathIterator(self, tar_path, new_volume_handler)
24ddf0a2 1152
df99a044 1153 def jsonize_path_iterator(self, iter, strip=0):
d07c8065
ERE
1154 '''
1155 converts the yielded items of an iterator into json path lines.
df99a044
ERE
1156
1157 strip: Strip the smallest prefix containing num leading slashes from
1158 the file path.
d07c8065
ERE
1159 '''
1160 while True:
1161 try:
be60ffd0 1162 path = iter.__next__()
df99a044 1163 if strip == 0:
4ac6d333 1164 yield self._stat_dict(path), 0
df99a044
ERE
1165 else:
1166 st = self._stat_dict(path)
1167 st['path'] = "/".join(path.split("/")[strip:])
4ac6d333 1168 yield st, 0
d07c8065
ERE
1169 except StopIteration:
1170 break
1171
b84beea7
PG
1172 def iterate_disaster_index (self, index):
1173 """
1174 Mimick the behavior of the other object iterators, just with the inputs
1175 supplied directly as *index*.
1176 """
1177
1178 class RawIndexIterator(object):
1179 def __init__(self, delta_tar, tar_path, index):
1180 self.delta_tar = delta_tar
1181 self.index = index
1182 self.__enter__()
1183
1184 def __iter__(self):
1185 return self
1186
1187 def release(self):
1188 if self.tar_obj:
1189 self.tar_obj.close()
1190
1191 def __enter__(self):
1192 '''
1193 Allows this iterator to be used with the "with" statement
1194 '''
1195 self.iter = self.index.__iter__ ()
1196 return self
1197
1198 def __exit__(self, type, value, tb):
1199 '''
1200 Allows this iterator to be used with the "with" statement
1201 '''
1202
1203 def __next__(self):
1204 idxent = self.iter.__next__ ()
1205 return idxent
1206
1207 return RawIndexIterator(self, index)
1208
d07c8065
ERE
1209 def collate_iterators(self, it1, it2):
1210 '''
1211 Collate two iterators, so that it returns pairs of the items of each
1212 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1213 when there's no match for the items in the other iterator.
1214
1215 It assumes that the items in both lists are ordered in the same way.
1216 '''
ea6d3c3e 1217 l_no = 0
d07c8065
ERE
1218 elem1, elem2 = None, None
1219 while True:
1220 if not elem1:
1221 try:
be60ffd0 1222 elem1, l_no = it1.__next__()
d07c8065
ERE
1223 except StopIteration:
1224 if elem2:
ea6d3c3e 1225 yield (None, elem2, l_no)
d07c8065 1226 for elem2 in it2:
ea6d3c3e
ERE
1227 if isinstance(elem2, tuple):
1228 elem2 = elem2[0]
1229 yield (None, elem2, l_no)
d07c8065 1230 break
d07c8065
ERE
1231 if not elem2:
1232 try:
be60ffd0 1233 elem2 = it2.__next__()
d07c8065
ERE
1234 if isinstance(elem2, tuple):
1235 elem2 = elem2[0]
1236 except StopIteration:
1237 if elem1:
ea6d3c3e 1238 yield (elem1, None, l_no)
df99a044 1239 for elem1, l_no in it1:
ea6d3c3e 1240 yield (elem1, None, l_no)
d07c8065 1241 break
670f9934
ERE
1242
1243 index1 = self.unprefixed(elem1['path'])
1244 index2 = self.unprefixed(elem2['path'])
1245 i1, i2 = self.compare_indexes(index1, index2)
1246
1247 yield1 = yield2 = None
1248 if i1 is not None:
1249 yield1 = elem1
1250 elem1 = None
1251 if i2 is not None:
1252 yield2 = elem2
1253 elem2 = None
1254 yield (yield1, yield2, l_no)
1255
1256 def compare_indexes(self, index1, index2):
1257 '''
1258 Compare iterator indexes and return a tuple in the following form:
1259 if index1 < index2, returns (index1, None)
1260 if index1 == index2 returns (index1, index2)
1261 else: returns (None, index2)
1262 '''
1263 l1 = index1.split('/')
1264 l2 = index2.split('/')
1265 length = len(l2) - len(l1)
1266
1267 if length > 0:
1268 return (index1, None)
1269 elif length < 0:
1270 return (None, index2)
1271
1272 for i1, i2 in zip(l1, l2):
1273 if i1 < i2:
1274 return (index1, None)
1275 elif i1 > i2:
1276 return (None, index2)
1277
1278 return (index1, index2)
0708a374 1279
8c65a2b1 1280 def list_backup(self, backup_tar_path, list_func=None):
be60ffd0 1281 if not isinstance(backup_tar_path, str):
8c65a2b1
ERE
1282 raise Exception('Backup tar path must be a string')
1283
1284 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1285 raise Exception('Source path "%s" does not exist or is not a '\
1286 'file' % backup_tar_path)
1287
1288 if not os.access(backup_tar_path, os.R_OK):
1289 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1290
1291 cwd = os.getcwd()
1292
b7c47f38 1293 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
8c65a2b1
ERE
1294 '''
1295 Handles the new volumes
1296 '''
1297 volume_name = deltarobj.volume_name_func(backup_path, True,
1298 volume_number, guess_name=True)
1299 volume_path = os.path.join(backup_path, volume_name)
1300
1301 # we convert relative paths into absolute because CWD is changed
1302 if not os.path.isabs(volume_path):
1303 volume_path = os.path.join(cwd, volume_path)
b7c47f38
PG
1304 tarobj.open_volume(volume_path, encryption=encryption)
1305
774ca538
PG
1306 if self.decryptor is None:
1307 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
8c65a2b1
ERE
1308
1309 backup_path = os.path.dirname(backup_tar_path)
1310 if not os.path.isabs(backup_path):
1311 backup_path = os.path.join(cwd, backup_path)
133d30da 1312 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
b7a6566b 1313
8c65a2b1
ERE
1314 tarobj = tarfile.TarFile.open(backup_tar_path,
1315 mode='r' + self.mode,
1316 format=tarfile.GNU_FORMAT,
d1c38f40 1317 concat='#' in self.mode,
133d30da 1318 encryption=self.decryptor,
ea625b04 1319 new_volume_handler=new_volume_handler,
e2b59b34
ERE
1320 save_to_members=False,
1321 dereference=True)
8c65a2b1
ERE
1322
1323 def filter(cls, list_func, tarinfo):
1324 if list_func is None:
b008f989 1325 self.logger.info(tarinfo.path)
8c65a2b1
ERE
1326 else:
1327 list_func(tarinfo)
1328 return False
1329 filter = partial(filter, self, list_func)
1330
1331 tarobj.extractall(filter=filter)
1332 tarobj.close()
1333
0708a374 1334 def restore_backup(self, target_path, backup_indexes_paths=[],
e93f83f1 1335 backup_tar_path=None, restore_callback=None,
b84beea7 1336 disaster=tarfile.TOLERANCE_STRICT, backup_index=None):
0708a374
ERE
1337 '''
1338 Restores a backup.
1339
1340 Parameters:
0708a374
ERE
1341 - target_path: path to restore.
1342 - backup_indexes_paths: path to backup indexes, in descending date order.
1343 The indexes indicate the location of their respective backup volumes,
1344 and multiple indexes are needed to be able to restore diff backups.
1345 Note that this is an optional parameter: if not suplied, it will
1346 try to restore directly from backup_tar_path.
1347 - backup_tar_path: path to the backup tar file. Used as an alternative
1348 to backup_indexes_paths to restore directly from a tar file without
1349 using any file index. If it's a multivol tarfile, volume_name_func
1350 will be called.
4da27cfe 1351 - restore_callback: callback function to be called during restore.
b0aef801 1352 This is passed to the helper and gets called for every file.
11684b1d 1353
3a7e1a50 1354 NOTE: If you want to use an index to restore a backup, this function
11684b1d
ERE
1355 only supports to do so when the tarfile mode is either uncompressed or
1356 uses concat compress mode, because otherwise it would be very slow.
3a7e1a50
ERE
1357
1358 NOTE: Indices are assumed to follow the same format as the index_mode
1359 specified in the constructor.
e93f83f1
PG
1360
1361 Returns the list of files that could not be restored, if there were
1362 any.
0708a374 1363 '''
11684b1d 1364 # check/sanitize input
be60ffd0 1365 if not isinstance(target_path, str):
e5c6ca04
ERE
1366 raise Exception('Target path must be a string')
1367
11684b1d
ERE
1368 if backup_indexes_paths is None and backup_tar_path == []:
1369 raise Exception("You have to either provide index paths or a tar path")
e5c6ca04 1370
b84beea7
PG
1371 if isinstance (backup_index, list) is True:
1372 mode = "disaster"
1373 elif len(backup_indexes_paths) == 0:
ea6d3c3e
ERE
1374 mode = "tar"
1375 else:
1376 mode = "diff"
1377
1378 if mode == "tar":
be60ffd0 1379 if not isinstance(backup_tar_path, str):
11684b1d
ERE
1380 raise Exception('Backup tar path must be a string')
1381
1382 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1383 raise Exception('Source path "%s" does not exist or is not a '\
1384 'file' % backup_tar_path)
1385
1386 if not os.access(backup_tar_path, os.R_OK):
1387 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1388 else:
1389 if not isinstance(backup_indexes_paths, list):
1390 raise Exception('backup_indexes_paths must be a list')
1391
1392 if self.mode.startswith(':') or self.mode.startswith('|'):
1393 raise Exception('Restore only supports either uncompressed tars'
1394 ' or concat compression when restoring from an index, and '
1395 ' the open mode you provided is "%s"' % self.mode)
1396
1397 for index in backup_indexes_paths:
be60ffd0 1398 if not isinstance(index, str):
11684b1d 1399 raise Exception('indices must be strings')
e5c6ca04 1400
11684b1d
ERE
1401 if not os.path.exists(index) or not os.path.isfile(index):
1402 raise Exception('Index path "%s" does not exist or is not a '\
1403 'file' % index)
1404
1405 if not os.access(index, os.R_OK):
1406 raise Exception('Index path "%s" is not readable' % index)
e5c6ca04
ERE
1407
1408 # try to create backup path if needed
1409 if not os.path.exists(target_path):
1410 os.makedirs(target_path)
1411
ec57ce53
ERE
1412 # make backup_tar_path absolute so that iterate_tar_path works fine
1413 if backup_tar_path and not os.path.isabs(backup_tar_path):
1414 backup_tar_path = os.path.abspath(backup_tar_path)
1415
d5361dac 1416 cwd = os.getcwd()
ec57ce53 1417 os.chdir(target_path)
d5361dac 1418
2ae46844 1419 # setup for decrypting payload
774ca538
PG
1420 if self.decryptor is None:
1421 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
2ae46844 1422
ea6d3c3e 1423 if mode == 'tar':
24ddf0a2
ERE
1424 index_it = self.iterate_tar_path(backup_tar_path)
1425 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
ec57ce53 1426 tarobj=index_it.tar_obj)
ea6d3c3e 1427 elif mode == "diff":
04f4c7ab
PG
1428 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1429 disaster=disaster)
f3d10816
PG
1430 try:
1431 # get iterator from newest index at _data[0]
1432 index1 = helper._data[0]["path"]
1433 index_it = self.iterate_index_path(index1)
1434 except tarfile.DecryptionError as exn:
1435 self.logger.error("failed to decrypt file [%s]: %s; is this an "
afc87ebc
PG
1436 "actual encrypted index file?"
1437 % (index1, str (exn)))
1438 return [(index1, exn)]
1439 except Exception as exn:
1440 # compressed files
1441 self.logger.error("failed to read file [%s]: %s; is this an "
1442 "actual index file?" % (index1, str (exn)))
f3d10816 1443 return [(index1, exn)]
b84beea7
PG
1444 elif mode == "disaster":
1445 index_it = self.iterate_disaster_index (backup_index)
1446 helper = RestoreHelper (self, cwd, disaster=disaster)
1447
d07c8065 1448
24ddf0a2
ERE
1449 dir_it = self._recursive_walk_dir('.')
1450 dir_path_it = self.jsonize_path_iterator(dir_it)
11684b1d 1451
e93f83f1
PG
1452 failed = [] # irrecoverable files
1453
a395759e 1454 # for each file to be restored, do:
24ddf0a2
ERE
1455 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1456 if not ipath:
1457 upath = dpath['path']
1458 op_type = dpath['type']
1459 else:
1460 upath = self.unprefixed(ipath['path'])
1461 op_type = ipath['type']
42c04ead 1462
24ddf0a2 1463 # filter paths
75059f3c 1464 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
24ddf0a2 1465 continue
ea6d3c3e 1466
24ddf0a2
ERE
1467 # if types of the file mismatch, the file needs to be deleted
1468 # and re-restored
1469 if ipath is not None and dpath is not None and\
1470 dpath['type'] != ipath['type']:
1471 helper.delete(upath)
1472
1473 # if file not found in dpath, we can directly restore from index
1474 if not dpath:
1475 # if the file doesn't exist and it needs to be deleted, it
1476 # means that work is already done
1477 if ipath['path'].startswith('delete://'):
ea6d3c3e 1478 continue
24ddf0a2 1479 try:
b008f989 1480 self.logger.debug("restore %s" % ipath['path'])
4da27cfe 1481 helper.restore(ipath, l_no, restore_callback)
be60ffd0 1482 except Exception as e:
e93f83f1 1483 iipath = ipath.get ("path", "")
7b07645e 1484 self.logger.error("FAILED to restore: {} ({})"
e93f83f1 1485 .format(iipath, e))
04f4c7ab 1486 if disaster != tarfile.TOLERANCE_STRICT:
e93f83f1 1487 failed.append ((iipath, e))
24ddf0a2 1488 continue
11684b1d 1489
24ddf0a2
ERE
1490 # if both files are equal, we have nothing to restore
1491 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1492 continue
1493
1494 # we have to restore the file, but first we need to delete the
1495 # current existing file.
1496 # we don't delete the file if it's a directory, because it might
1497 # just have changed mtime, so it's quite inefficient to remove
1498 # it
1499 if ipath:
1500 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
42c04ead 1501 helper.delete(upath)
b008f989 1502 self.logger.debug("restore %s" % ipath['path'])
e93f83f1
PG
1503 try:
1504 helper.restore(ipath, l_no, restore_callback)
1505 except Exception as e:
04f4c7ab 1506 if disaster == tarfile.TOLERANCE_STRICT:
e93f83f1
PG
1507 raise
1508 failed.append ((ipath.get ("path", ""), e))
1509 continue
24ddf0a2
ERE
1510
1511 # if the file is not in the index (so it comes from the target
1512 # directory) then we have to delete it
1513 else:
c9d47a03 1514 self.logger.debug("delete %s" % upath)
24ddf0a2 1515 helper.delete(upath)
42c04ead 1516
ec57ce53
ERE
1517 helper.restore_directories_permissions()
1518 index_it.release()
1519 os.chdir(cwd)
1520 helper.cleanup()
ea6d3c3e 1521
e93f83f1
PG
1522 return failed
1523
1524
1525 def recover_backup(self, target_path, backup_indexes_paths=[],
1526 restore_callback=None):
1527 """
1528 Walk the index, extracting objects in disaster mode. Bad files are
1529 reported along with a reason.
1530 """
1531 return self.restore_backup(target_path,
1532 backup_indexes_paths=backup_indexes_paths,
04f4c7ab
PG
1533 disaster=tarfile.TOLERANCE_RECOVER)
1534
1535
6690f5e0 1536 def rescue_backup(self, target_path, backup_tar_path,
04f4c7ab
PG
1537 restore_callback=None):
1538 """
1539 More aggressive “unfsck” mode: do not rely on the index data as the
1540 files may be corrupt; skim files for header-like information and
1541 attempt to retrieve the data.
1542 """
b84beea7
PG
1543 backup_index = tarfile.gen_rescue_index(backup_tar_path,
1544 self.mode,
1545 password=self.password,
1546 key=self.crypto_key)
6690f5e0 1547
04f4c7ab 1548 return self.restore_backup(target_path,
b84beea7 1549 backup_index=backup_index,
04f4c7ab 1550 disaster=tarfile.TOLERANCE_RESCUE)
e93f83f1
PG
1551
1552
11684b1d
ERE
1553 def _parse_json_line(self, f, l_no):
1554 '''
ee0e095f 1555 Read line from file like object and process it as JSON.
11684b1d
ERE
1556 '''
1557 l = f.readline()
1558 l_no += 1
1559 try:
be60ffd0 1560 j = json.loads(l.decode('UTF-8'))
ee0e095f
PG
1561 except UnicodeDecodeError as e:
1562 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1563 raise Exception \
1564 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1565 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1566 from e
1567 raise Exception \
1568 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1569 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1570 from e
be60ffd0 1571 except ValueError as e:
11684b1d
ERE
1572 raise Exception("error parsing this json line "
1573 "(line number %d): %s" % (l_no, l))
1574 return j, l_no
ea6d3c3e 1575
24ddf0a2 1576
ea6d3c3e
ERE
1577class RestoreHelper(object):
1578 '''
1579 Class used to help to restore files from indices
1580 '''
1581
1582 # holds the dicts of data
1583 _data = []
1584
1585 _deltatar = None
1586
1587 _cwd = None
1588
0501fe0a
ERE
1589 # list of directories to be restored. This is done as a last step, see
1590 # tarfile.extractall for details.
1591 _directories = []
1592
04f4c7ab 1593 _disaster = tarfile.TOLERANCE_STRICT
e93f83f1 1594
037994ca 1595 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
04f4c7ab 1596 tarobj=None, disaster=tarfile.TOLERANCE_STRICT):
ea6d3c3e
ERE
1597 '''
1598 Constructor opens the tars and init the data structures.
1599
037994ca
PG
1600 Assumptions:
1601
1602 - Index list must be provided in reverse order (newer first).
1603 - “newer first” apparently means that if there are n backups
1604 provided, the last full backup is at index n-1 and the most recent
1605 diff backup is at index 0.
1606 - Only the first, the second, and the last elements of
1607 ``index_list`` are relevant, others will not be accessed.
1608 - If no ``index_list`` is provided, both ``tarobj`` and
1609 ``backup_path`` must be passed.
1610 - If ``index_list`` is provided, the values of ``tarobj`` and
1611 ``backup_path`` are ignored.
ea6d3c3e
ERE
1612 '''
1613 self._data = []
0501fe0a 1614 self._directories = []
ea6d3c3e
ERE
1615 self._deltatar = deltatar
1616 self._cwd = cwd
3031b7ae 1617 self._password = deltatar.password
1f3fd7b0 1618 self._crypto_key = deltatar.crypto_key
3031b7ae 1619 self._decryptors = []
e93f83f1 1620 self._disaster = disaster
ea6d3c3e 1621
253d4cdd
ERE
1622 try:
1623 import grp, pwd
1624 except ImportError:
1625 grp = pwd = None
1626
1627 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1628 self.canchown = True
1629 else:
1630 self.canchown = False
1631
037994ca 1632 if index_list is not None:
24ddf0a2 1633 for index in index_list:
037994ca 1634 is_full = index == index_list[-1]
24ddf0a2 1635
d5e1d60f 1636 decryptor = None
3031b7ae 1637 if self._password is not None:
1f3fd7b0
PG
1638 decryptor = crypto.Decrypt (password=self._password,
1639 key=self._crypto_key)
d5e1d60f 1640
24ddf0a2
ERE
1641 # make paths absolute to avoid cwd problems
1642 if not os.path.isabs(index):
1643 index = os.path.normpath(os.path.join(cwd, index))
1644
1645 s = dict(
1646 curr_vol_no = None,
1647 vol_fd = None,
1648 offset = -1,
1649 tarobj = None,
1650 path = index,
1651 is_full = is_full,
1652 iterator = None,
1653 last_itelement = None,
1654 last_lno = 0,
1655 new_volume_handler = partial(self.new_volume_handler,
1656 self._deltatar, self._cwd, is_full,
d5e1d60f
PG
1657 os.path.dirname(index), decryptor),
1658 decryptor = decryptor
24ddf0a2
ERE
1659 )
1660 self._data.append(s)
1661 else:
ea6d3c3e 1662 # make paths absolute to avoid cwd problems
24ddf0a2
ERE
1663 if not os.path.isabs(backup_path):
1664 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
ea6d3c3e 1665
ec57ce53
ERE
1666 # update the new_volume_handler of tar_obj
1667 tarobj.new_volume_handler = partial(self.new_volume_handler,
b7c47f38 1668 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
133d30da 1669 self._deltatar.decryptor)
ea6d3c3e
ERE
1670 s = dict(
1671 curr_vol_no = None,
1672 vol_fd = None,
1673 offset = -1,
24ddf0a2
ERE
1674 tarobj = tarobj,
1675 path = backup_path,
1676 is_full = True,
670f9934
ERE
1677 iterator = None,
1678 last_itelement = None,
1679 last_lno = 0,
d5e1d60f
PG
1680 new_volume_handler = tarobj.new_volume_handler,
1681 decryptor = self._deltatar.decryptor
ea6d3c3e
ERE
1682 )
1683 self._data.append(s)
1684
3031b7ae 1685
ea6d3c3e
ERE
1686 def cleanup(self):
1687 '''
1688 Closes all open files
1689 '''
1690 for data in self._data:
55b2ffd0
ERE
1691 if data['vol_fd']:
1692 data['vol_fd'].close()
1693 data['vol_fd'] = None
ea6d3c3e
ERE
1694 if data['tarobj']:
1695 data['tarobj'].close()
1696 data['tarobj'] = None
ea6d3c3e
ERE
1697
1698 def delete(self, path):
1699 '''
1700 Delete a file
1701 '''
df99a044
ERE
1702 if not os.path.exists(path):
1703 return
1704
24ddf0a2 1705 # to preserve parent directory mtime, we save it
283fbd5e 1706 parent_dir = os.path.dirname(path) or os.getcwd()
24ddf0a2
ERE
1707 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1708
561bc39f 1709 if os.path.isdir(path) and not os.path.islink(path):
ea6d3c3e
ERE
1710 shutil.rmtree(path)
1711 else:
1712 os.unlink(path)
1713
24ddf0a2
ERE
1714 # now we restore parent_directory mtime
1715 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1716
4da27cfe 1717 def restore(self, itpath, l_no, callback=None):
ea6d3c3e 1718 '''
8a54d5dd 1719 Restore the path from the appropriate backup. Receives the current path
e8d95fe5 1720 from the newest (=first) index iterator. itpath must be not null.
b0aef801 1721 callback is a custom function that gets called for every file.
037994ca
PG
1722
1723 NB: This function takes the attribute ``_data`` as input but will only
1724 ever use its first and, if available, second element. Anything else in
1725 ``._data[]`` will be ignored.
ea6d3c3e 1726 '''
ea6d3c3e
ERE
1727 path = itpath['path']
1728
4da27cfe
SA
1729 # Calls the callback function
1730 if callback:
1731 callback()
1732
ea6d3c3e 1733 if path.startswith('delete://'):
df86af81
ERE
1734 # the file has previously been deleted already in restore_backup in
1735 # all cases so we just need to finish
ea6d3c3e 1736 return
df86af81 1737
e8d95fe5 1738 # get data from newest index (_data[0])
df86af81
ERE
1739 data = self._data[0]
1740 upath = self._deltatar.unprefixed(path)
1741
24ddf0a2 1742 # to preserve parent directory mtime, we save it
283fbd5e 1743 parent_dir = os.path.dirname(upath) or os.getcwd()
ec57ce53
ERE
1744 if not os.path.exists(parent_dir):
1745 os.makedirs(parent_dir)
24ddf0a2
ERE
1746 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1747
e8d95fe5 1748 # if path is found in the newest index as to be snapshotted, deal with it
df86af81
ERE
1749 # and finish
1750 if path.startswith('snapshot://'):
e93f83f1
PG
1751 try:
1752 self.restore_file(itpath, data, path, l_no, upath)
1753 except Exception:
1754 raise
24ddf0a2
ERE
1755
1756 # now we restore parent_directory mtime
1757 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
ea6d3c3e
ERE
1758 return
1759
1760 # we go from index to index, finding the path in the index, then finding
1761 # the index with the most recent snapshot of the file being restored
e8d95fe5
TJ
1762 #
1763 # Right now we support diff backups, only. No incremental backups.
1764 # As a result _data[0] is always the diff backup index
1765 # and _data[1] the full backup index.
527670c4 1766 if len(self._data) == 2:
7273719c 1767 data = self._data[1]
527670c4
TJ
1768 d, l_no, dpath = self.find_path_in_index(data, upath)
1769 if not d:
1770 self._deltatar.logger.warning('Error restoring file %s from '
1771 'index, not found in index %s' % (path, data['path']))
1772 return
1773
1774 cur_path = d.get('path', '')
1775 if cur_path.startswith('delete://'):
1776 self._deltatar.logger.warning(('Strange thing happened, file '
1777 '%s was listed in first index but deleted by another '
1778 'one. Path was ignored and untouched.') % path)
1779 return
1780 elif cur_path.startswith('snapshot://'):
1781 # this code path is reached when the file is unchanged
1782 # in the newest index and therefore of type 'list://'
1783 self.restore_file(d, data, path, l_no, dpath)
1784
1785 # now we restore parent_directory mtime
1786 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1787 return
1788
1789 # error code path is reached when:
1790 # a) we have more than two indexes (unsupported atm)
1791 # b) both indexes contain a list:// entry (logic error)
1792 # c) we have just one index and it also contains list://
4bda6f45 1793 self._deltatar.logger.warning(('Error restoring file %s from index, '
ea6d3c3e
ERE
1794 'snapshot not found in any index') % path)
1795
670f9934
ERE
1796 def find_path_in_index(self, data, upath):
1797 # NOTE: we restart the iterator sometimes because the iterator can be
1798 # walked over completely multiple times, for example if one path if not
1799 # found in one index and we have to go to the next index.
7273719c
PG
1800 it = data['iterator']
1801 if it is None:
670f9934 1802 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
be60ffd0 1803 d, l_no = it.__next__()
670f9934 1804 else:
670f9934
ERE
1805 d = data['last_itelement']
1806 l_no = data['last_lno']
1807
670f9934 1808 while True:
7273719c 1809 dpath = self._deltatar.unprefixed(d.get('path', ''))
670f9934
ERE
1810 if upath == dpath:
1811 data['last_itelement'] = d
1812 data['last_lno'] = l_no
1813 return d, l_no, dpath
1814
1815 up, dp = self._deltatar.compare_indexes(upath, dpath)
1816 # any time upath should have appeared before current dpath, it means
1817 # upath is just not in this index and we should stop
1818 if dp is None:
1819 data['last_itelement'] = d
1820 data['last_lno'] = l_no
1821 return None, 0, ''
1822
1823 try:
be60ffd0 1824 d, l_no = it.__next__()
670f9934
ERE
1825 except StopIteration:
1826 data['last_itelement'] = d
1827 data['last_lno'] = l_no
1828 return None, 0, ''
670f9934 1829
0501fe0a
ERE
1830 def restore_directories_permissions(self):
1831 '''
1832 Restore directory permissions when everything have been restored
1833 '''
42c04ead
ERE
1834 try:
1835 import grp, pwd
1836 except ImportError:
1837 grp = pwd = None
1838
0501fe0a
ERE
1839 self._directories.sort(key=operator.attrgetter('name'))
1840 self._directories.reverse()
0501fe0a
ERE
1841
1842 # Set correct owner, mtime and filemode on directories.
1843 for member in self._directories:
1844 dirpath = member.name
1845 try:
42c04ead
ERE
1846 os.chmod(dirpath, member.mode)
1847 os.utime(dirpath, (member.mtime, member.mtime))
253d4cdd 1848 if self.canchown:
42c04ead
ERE
1849 # We have to be root to do so.
1850 try:
1851 g = grp.getgrnam(member.gname)[2]
1852 except KeyError:
1853 g = member.gid
1854 try:
1855 u = pwd.getpwnam(member.uname)[2]
1856 except KeyError:
1857 u = member.uid
1858 try:
4e433e00 1859 if member.issym and hasattr(os, "lchown"):
42c04ead
ERE
1860 os.lchown(dirpath, u, g)
1861 else:
1862 os.chown(dirpath, u, g)
1863 except EnvironmentError:
1864 raise tarfile.ExtractError("could not change owner")
1865
be60ffd0 1866 except tarfile.ExtractError as e:
4bda6f45 1867 self._deltatar.logger.warning('tarfile: %s' % e)
0501fe0a 1868
df86af81 1869 @staticmethod
b7c47f38 1870 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
ea6d3c3e
ERE
1871 '''
1872 Handles the new volumes
1873 '''
df86af81
ERE
1874 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1875 volume_number, guess_name=True)
ea6d3c3e
ERE
1876 volume_path = os.path.join(backup_path, volume_name)
1877
1878 # we convert relative paths into absolute because CWD is changed
1879 if not os.path.isabs(volume_path):
1880 volume_path = os.path.join(cwd, volume_path)
b7c47f38 1881 tarobj.open_volume(volume_path, encryption=encryption)
ea6d3c3e 1882
253d4cdd 1883 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
ea6d3c3e
ERE
1884 '''
1885 Restores a snapshot of a file from a specific backup
1886 '''
ea6d3c3e 1887 op_type = file_data.get('type', -1)
24ddf0a2 1888 member = file_data.get('member', None)
9f9ae874 1889 ismember = bool(member)
24ddf0a2
ERE
1890
1891 # when member is set, then we can assume everything is right and we
1892 # just have to restore the path
a2a37de7 1893 if member is None:
24ddf0a2
ERE
1894 vol_no = file_data.get('volume', -1)
1895 # sanity check
1896 if not isinstance(vol_no, int) or vol_no < 0:
4bda6f45 1897 self._deltatar.logger.warning('unrecognized type to be restored: '
24ddf0a2
ERE
1898 '%s, line %d' % (op_type, l_no))
1899
1900 # setup the volume that needs to be read. only needed when member is
1901 # not set
a2a37de7 1902 if index_data['curr_vol_no'] != vol_no:
24ddf0a2
ERE
1903 index_data['curr_vol_no'] = vol_no
1904 backup_path = os.path.dirname(index_data['path'])
1905 vol_name = self._deltatar.volume_name_func(backup_path,
1906 index_data['is_full'], vol_no, guess_name=True)
1907 vol_path = os.path.join(backup_path, vol_name)
1908 if index_data['vol_fd']:
1909 index_data['vol_fd'].close()
be60ffd0 1910 index_data['vol_fd'] = open(vol_path, 'rb')
24ddf0a2
ERE
1911
1912 # force reopen of the tarobj because of new volume
1913 if index_data['tarobj']:
1914 index_data['tarobj'].close()
1915 index_data['tarobj'] = None
1916
1917 # seek tarfile if needed
1918 offset = file_data.get('offset', -1)
ea6d3c3e 1919 if index_data['tarobj']:
c6226e2a
PG
1920 try:
1921 member = index_data['tarobj'].__iter__().__next__()
e93f83f1
PG
1922 except tarfile.DecryptionError:
1923 pass
1924 except tarfile.CompressionError:
1925 pass
1926
24ddf0a2
ERE
1927 if not member or member.path != file_data['path']:
1928 # force a seek and reopen
1929 index_data['tarobj'].close()
1930 index_data['tarobj'] = None
1931
1932 # open the tarfile if needed
1933 if not index_data['tarobj']:
1934 index_data['vol_fd'].seek(offset)
1935 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1936 fileobj=index_data['vol_fd'],
1937 format=tarfile.GNU_FORMAT,
d1c38f40 1938 concat='#' in self._deltatar.mode,
d5e1d60f 1939 encryption=index_data["decryptor"],
253d4cdd 1940 new_volume_handler=index_data['new_volume_handler'],
044585c6 1941 save_to_members=False,
04f4c7ab 1942 tolerance=self._disaster)
24ddf0a2 1943
be60ffd0 1944 member = index_data['tarobj'].__iter__().__next__()
ea6d3c3e 1945
253d4cdd
ERE
1946 member.path = unprefixed_path
1947 member.name = unprefixed_path
0501fe0a
ERE
1948
1949 if op_type == 'directory':
253d4cdd 1950 self.add_member_dir(member)
0501fe0a 1951 member = copy.copy(member)
be60ffd0 1952 member.mode = 0o0700
0501fe0a 1953
df86af81
ERE
1954 # if it's an existing directory, we then don't need to recreate it
1955 # just set the right permissions, mtime and that kind of stuff
1956 if os.path.exists(member.path):
1957 return
1958
9f9ae874 1959 if not ismember:
24ddf0a2
ERE
1960 # set current volume number in tarobj, otherwise the extraction of the
1961 # file might fail when trying to extract a multivolume member
1962 index_data['tarobj'].volume_number = index_data['curr_vol_no']
86a6e741 1963
9b13f5c4
PG
1964 def ignore_symlink (member, *_args):
1965 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
786addd6 1966
ea6d3c3e 1967 # finally, restore the file
9b13f5c4 1968 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
253d4cdd
ERE
1969
1970 def add_member_dir(self, member):
1971 '''
1972 Add member dir to be restored at the end
1973 '''
4e433e00 1974 if not self.canchown:
253d4cdd
ERE
1975 self._directories.append(DirItem(name=member.name, mode=member.mode,
1976 mtime=member.mtime))
1977 else:
1978 self._directories.append(DirItem(name=member.name, mode=member.mode,
1979 mtime=member.mtime, gname=member.gname, uname=member.uname,
4e433e00 1980 uid=member.uid, gid=member.gid, issym=member.issym()))
253d4cdd
ERE
1981
1982class DirItem(object):
1983 def __init__(self, **kwargs):
be60ffd0 1984 for k, v in kwargs.items():
9f9ae874 1985 setattr(self, k, v)