3 # Copyright (C) 2013, 2014 Intra2net AG
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published
7 # by the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see
17 # <http://www.gnu.org/licenses/lgpl-3.0.html>
19 DELTATAR_HEADER_VERSION = 1
20 DELTATAR_PARAMETER_VERSION = 1
34 from functools import partial
39 class NullHandler(logging.Handler):
40 def emit(self, record):
44 logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
52 # encryption direction
53 CRYPTO_MODE_ENCRYPT = 0
54 CRYPTO_MODE_DECRYPT = 1
56 # The canonical extension for encrypted backup files regardless of the actual
57 # encryption parameters is “.pdtcrypt”. This is analogous to the encryption
58 # header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
59 # Since the introduction of the versioned header there no longer any need
60 # for encoding encryption parameters in the file extensions (“.aes128” and
62 PDTCRYPT_EXTENSION = "pdtcrypt"
66 AUXILIARY_FILE_INDEX = 0
67 AUXILIARY_FILE_INFO = 1
69 class DeltaTar(object):
71 Backup class used to create backups
74 # list of files to exclude in the backup creation or restore operation. It
75 # can contain python regular expressions.
78 # list of files to include in the backup creation or restore operation. It
79 # can contain python regular expressions. If empty, all files in the source
80 # path will be backed up (when creating a backup) or all the files in the
81 # backup will be restored (when restoring a backup), but if included_files
82 # is set then only the files include in the list will be processed.
85 # custom filter of files to be backed up (or restored). Unused and unset
86 # by default. The function receives a file path and must return a boolean.
89 # mode in which the delta will be created (when creating a backup) or
90 # opened (when restoring). Accepts modes analog to the tarfile library.
93 # used together with aes modes to encrypt and decrypt backups.
98 # parameter version to use when encrypting; note that this has no effect
99 # on decryption since the required settings are determined from the headers
100 crypto_version = DELTATAR_HEADER_VERSION
101 crypto_paramversion = None
103 # when encrypting or decrypting, these hold crypto handlers; created before
104 # establishing the Tarfile stream iff a password is supplied.
108 # python logger object.
111 # specifies the index mode in the same format as @param mode, but without
112 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
113 # that the index is encrypted if no password is given in the constructor.
116 # current time for this backup. Used for file names and file creation checks
119 # extra data to included in the header of the index file when creating a
123 # valid tarfile modes and their corresponding default file extension
124 __file_extensions_dict = {
133 '#gz.pdtcrypt': '.gz',
138 # valid index modes and their corresponding default file extension
139 __index_extensions_dict = {
143 'gz.pdtcrypt': '.gz',
147 # valid path prefixes
148 __path_prefix_list = [
154 def __init__(self, excluded_files=[], included_files=[],
155 filter_func=None, mode="", password=None,
156 crypto_key=None, nacl=None,
157 crypto_version=DELTATAR_HEADER_VERSION,
158 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
159 logger=None, index_mode=None, index_name_func=None,
160 volume_name_func=None):
162 Constructor. Configures the diff engine.
165 - excluded_files: list of files to exclude in the backup creation or
166 restore operation. It can contain python regular expressions.
168 - included_files: list of files to include in the backup creation or
169 restore operation. It can contain python regular expressions. If
170 empty, all files in the source path will be backed up (when creating a
171 backup) or all the files in the backup will be restored (when
172 restoring a backup), but if included_files is set then only the files
173 include in the list will be processed.
175 - filter_func: custom filter of files to be backed up (or restored).
176 Unused and unset by default. The function receives a file path and
177 must return a boolean.
179 - mode: mode in which the delta will be created (when creating a backup)
180 or opened (when restoring). Accepts the same modes as the tarfile
181 library. Valid modes are:
184 ':' open uncompressed
185 ':gz' open with gzip compression
186 ':bz2' open with bzip2 compression
187 '|' open an uncompressed stream of tar blocks
188 '|gz' open a gzip compressed stream of tar blocks
189 '|bz2' open a bzip2 compressed stream of tar blocks
190 '#gz' open a stream of gzip compressed tar blocks
192 - crypto_key: used to encrypt and decrypt backups. Encryption will
193 be enabled automatically if a key is supplied. Requires a salt to be
196 - nacl: salt that was used to derive the encryption key for embedding
197 in the PDTCRYPT header. Not needed when decrypting and when
198 encrypting with password.
200 - password: used to encrypt and decrypt backups. Encryption will be
201 enabled automatically if a password is supplied.
203 - crypto_version: version of the format, determining the kind of PDT
206 - crypto_paramversion: optionally request encryption conforming to
207 a specific parameter version. Defaults to the standard PDT value
208 which as of 2017 is the only one available.
210 - logger: python logger object. Optional.
212 - index_mode: specifies the index mode in the same format as @param
213 mode, but without the ':', '|' or '#' at the begining. If encryption
214 is requested it will extend to the auxiliary (index, info) files as
215 well. This is an optional parameter that will automatically mimic
216 @param mode by default if not provided. Valid modes are:
219 'gz' open with gzip compression
220 'bz2' open with bzip2 compression
222 - index_name_func: function that sets a custom name for the index file.
223 This function receives a flag to indicate whether the name will be
224 used for a full or diff backup. The backup path will be prepended to
227 - volume_name_func: function that defines the name of tar volumes. It
228 receives the backup_path, if it's a full backup and the volume number,
229 and must return the name for the corresponding volume name. Optional,
230 DeltaTar has default names for tar volumes.
233 if mode not in self.__file_extensions_dict:
234 raise Exception('Unrecognized extension mode=[%s] requested for files'
237 self.excluded_files = excluded_files
238 self.included_files = included_files
239 self.filter_func = filter_func
240 self.logger = logging.getLogger('deltatar.DeltaTar')
242 self.logger.addHandler(logger)
245 if crypto_key is not None:
246 self.crypto_key = crypto_key
247 self.nacl = nacl # encryption only
249 if password is not None:
250 self.password = password
252 if crypto_version is not None:
253 self.crypto_version = crypto_version
255 if crypto_paramversion is not None:
256 self.crypto_paramversion = crypto_paramversion
258 # generate index_mode
259 if index_mode is None:
265 elif mode not in self.__index_extensions_dict:
266 raise Exception('Unrecognized extension mode=[%s] requested for index'
269 self.index_mode = index_mode
270 self.current_time = datetime.datetime.now()
272 if index_name_func is not None:
273 self.index_name_func = index_name_func
275 if volume_name_func is not None:
276 self.volume_name_func = volume_name_func
278 def pick_extension(self, kind, mode=None):
280 Choose the extension depending on a) the kind of file given, b) the
281 processing mode, and c) the current encryption settings.
284 if kind == PDT_TYPE_ARCHIVE:
287 mode = self.__index_extensions_dict [self.index_mode]
289 if self.crypto_key is not None or self.password is not None:
290 ret += "." + PDTCRYPT_EXTENSION
293 def index_name_func(self, is_full): # pylint: disable=method-hidden
295 Callback for setting a custom name for the index file. Depending on
296 whether *is_full* is set, it will create a suitable name for a full
299 prefix = "bfull" if is_full else "bdiff"
300 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
301 extension = self.pick_extension \
303 self.__index_extensions_dict [self.index_mode])
305 return "%s-%s.index%s" % (prefix, date_str, extension)
307 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
308 is_full, volume_number,
311 function that defines the name of tar volumes. It receives the
312 backup_path, if it's a full backup and the volume number, and must return
313 the name for the corresponding volume name. Optional, DeltaTar has default
314 names for tar volumes.
316 If guess_name is activated, the file is intended not to be created but
317 to be found, and thus the date will be guessed.
319 prefix = "bfull" if is_full else "bdiff"
320 extension = self.pick_extension \
322 self.__file_extensions_dict [self.mode])
325 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
326 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
328 prefix = prefix + "-"
329 postfix = "-%03d%s" % (volume_number + 1, extension)
330 for f in os.listdir(backup_path):
331 if f.startswith(prefix) and f.endswith(postfix):
333 raise Exception("volume not found")
336 def filter_path(self, path, source_path="", is_dir=None):
338 Filters a path, given the source_path, using the filtering properties
339 set in the constructor.
340 The filtering order is:
341 1. included_files (if any)
343 3. filter_func (which must return whether the file is accepted or not)
346 if len(source_path) > 0:
347 # ensure that exactly one '/' at end of dir is also removed
348 source_path = source_path.rstrip(os.sep) + os.sep
349 path = path[len(source_path):]
351 # 1. filter included_files
353 if len(self.included_files) > 0:
355 for i in self.included_files:
356 # it can be either a regexp or a string
357 if isinstance(i, str):
358 # if the string matches, then continue
363 # if the string ends with / it's a directory, and if the
364 # path is contained in it, it is included
365 if i.endswith('/') and path.startswith(i):
369 # if the string doesn't end with /, add it and do the same
371 elif path.startswith(i + '/'):
375 # check for PARENT_MATCH
378 if not dir_path.endswith('/'):
381 if i.startswith(dir_path):
384 # if it's a reg exp, then we just check if it matches
385 elif isinstance(i, typing.Pattern):
390 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
392 if match == NO_MATCH:
395 # when a directory is in PARENT_MATCH, it doesn't matter if it's
396 # excluded. It's subfiles will be excluded, but the directory itself
398 if match != PARENT_MATCH:
399 for e in self.excluded_files:
400 # it can be either a regexp or a string
401 if isinstance(e, str):
402 # if the string matches, then exclude
406 # if the string ends with / it's a directory, and if the
407 # path starts with the directory, then exclude
408 if e.endswith('/') and path.startswith(e):
411 # if the string doesn't end with /, do the same check with
413 elif path.startswith(e + '/'):
416 # if it's a reg exp, then we just check if it matches
417 elif isinstance(e, typing.Pattern):
421 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
424 return self.filter_func(path)
428 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
430 Walk a directory recursively, yielding each file/directory
432 Returns the path of an entity. If ``keep_base_dir`` is set,
433 the path returned contains the prefix ``source_path``; otherwise it is
434 relative to the prefix.
437 source_path = source_path.rstrip(os.sep)
442 beginning_size = len(source_path) + 1 # +1 for os.sep
444 queue = [source_path]
447 cur_path = queue.pop(0)
450 dfd = os.open (cur_path, os.O_DIRECTORY)
451 except FileNotFoundError as exn:
452 self.logger.warning ("failed to open entity [%s] as directory; "
453 "file system (error: %s); skipping"
454 % (cur_path, str (exn)))
458 for filename in sorted(os.listdir(dfd)):
459 child = os.path.join(cur_path, filename)
460 is_dir = os.path.isdir(child)
461 status = self.filter_path(child, source_path, is_dir)
462 if status == NO_MATCH:
464 if not os.access(child, os.R_OK):
465 self.logger.warning('Error accessing possibly locked file %s' % child)
469 yield child[beginning_size:]
471 if is_dir and (status == MATCH or status == PARENT_MATCH):
476 def _stat_dict(self, path):
478 Returns a dict with the stat data used to compare files
480 stinfo = os.stat(path)
481 mode = stinfo.st_mode
484 if stat.S_ISDIR(mode):
486 elif stat.S_ISREG(mode):
488 elif stat.S_ISLNK(mode):
495 u'mtime': int(stinfo.st_mtime),
496 u'ctime': int(stinfo.st_ctime),
497 u'uid': stinfo.st_uid,
498 u'gid': stinfo.st_gid,
499 u'inode': stinfo.st_ino,
500 u'size': stinfo.st_size
503 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
505 Return if the dicts are equal in the stat keys
507 keys = [u'type', u'mode',u'size', u'mtime',
508 # not restored: u'inode', u'ctime'
511 # only if user is root, then also check gid/uid. otherwise do not check it,
512 # because tarfile can chown in case of being superuser only
514 # also, skip the check in rpmbuild since the sources end up with the
515 # uid:gid of the packager while the extracted files are 0:0.
516 if hasattr(os, "geteuid") and os.geteuid() == 0 \
517 and os.getenv ("RPMBUILD_OPTIONS") is None:
521 if (not d1 and d2 != None) or (d1 != None and not d2):
524 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
527 type = d1.get('type', '')
530 # size doesn't matter for directories
531 if type == 'directory' and key == 'size':
533 if d1.get(key, -1) != d2.get(key, -2):
537 def prefixed(self, path, listsnapshot_equal=False):
539 if a path is not prefixed, return it prefixed
541 for prefix in self.__path_prefix_list:
542 if path.startswith(prefix):
543 if listsnapshot_equal and prefix == u'list://':
544 return u'snapshot://' + path[len(prefix):]
546 return u'snapshot://' + path
548 def unprefixed(self, path):
550 remove a path prefix if any
552 for prefix in self.__path_prefix_list:
553 if path.startswith(prefix):
554 return path[len(prefix):]
558 def initialize_encryption (self, mode, strict_validation=True):
560 :type strict_validation: bool
561 :param strict_validation: Enable strict IV checking in the crypto
562 layer. Should be disabled when dealing with
563 potentially corrupted data.
565 password = self.password
566 key = self.crypto_key
569 if key is None and password is None:
571 if mode == CRYPTO_MODE_ENCRYPT:
572 return crypto.Encrypt (password=password,
575 version=self.crypto_version,
576 paramversion=self.crypto_paramversion)
577 if mode == CRYPTO_MODE_DECRYPT:
578 return crypto.Decrypt (password=password, key=key,
579 strict_ivs=strict_validation)
581 raise Exception ("invalid encryption mode [%r]" % mode)
584 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX,
585 strict_validation=True):
587 Given the specified configuration, opens a file for reading or writing,
588 inheriting the encryption and compression settings from the backup.
589 Returns a file object ready to use.
591 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
594 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
595 Both the info and the auxiliary file have a globally
596 unique, constant counter value.
599 if self.index_mode.startswith('gz'):
601 elif self.index_mode.startswith('bz2'):
609 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
611 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT,
612 strict_validation=strict_validation)
614 if crypto_ctx is not None:
615 if kind == AUXILIARY_FILE_INFO:
616 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
617 elif kind == AUXILIARY_FILE_INDEX:
618 enccounter = crypto.AES_GCM_IV_CNT_INDEX
620 raise Exception ("invalid kind of aux file %r" % kind)
622 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
623 bufsize=tarfile.RECORDSIZE, fileobj=None,
624 encryption=crypto_ctx, enccounter=enccounter)
629 def create_full_backup(self, source_path, backup_path,
630 max_volume_size=None, extra_data=dict()):
632 Creates a full backup.
635 - source_path: source path to the directory to back up.
636 - backup_path: path where the back up will be stored. Backup path will
637 be created if not existent.
638 - max_volume_size: maximum volume size in megabytes. Used to split the
639 backup in volumes. Optional (won't split in volumes by default).
640 - extra_data: a json-serializable dictionary with information that you
641 want to be included in the header of the index file
644 if not isinstance(source_path, str):
645 raise Exception('Source path must be a string')
647 if not isinstance(backup_path, str):
648 raise Exception('Backup path must be a string')
650 if not os.path.exists(source_path) or not os.path.isdir(source_path):
651 raise Exception('Source path "%s" does not exist or is not a '\
652 'directory' % source_path)
654 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
655 max_volume_size < 1):
656 raise Exception('max_volume_size must be a positive integer')
657 if max_volume_size != None:
658 max_volume_size = max_volume_size*1024*1024
660 if not isinstance(extra_data, dict):
661 raise Exception('extra_data must be a dictionary')
664 extra_data_str = json.dumps(extra_data)
666 raise Exception('extra_data is not json-serializable')
668 if not os.access(source_path, os.R_OK):
669 raise Exception('Source path "%s" is not readable' % source_path)
671 # try to create backup path if needed
672 os.makedirs(backup_path, exist_ok=True)
674 if not os.access(backup_path, os.W_OK):
675 raise Exception('Backup path "%s" is not writeable' % backup_path)
677 if source_path.endswith('/'):
678 source_path = source_path[:-1]
680 if backup_path.endswith('/'):
681 backup_path = backup_path[:-1]
683 # update current time
684 self.current_time = datetime.datetime.now()
686 if self.mode not in self.__file_extensions_dict:
687 raise Exception('Unrecognized extension')
689 # setup for encrypting payload
690 if self.encryptor is None:
691 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
693 # some initialization
696 # generate the first volume name
697 vol_name = self.volume_name_func(backup_path, True, 0)
698 tarfile_path = os.path.join(backup_path, vol_name)
701 index_name = self.index_name_func(True)
702 index_path = os.path.join(backup_path, index_name)
703 index_sink = self.open_auxiliary_file(index_path, 'w')
707 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
709 Handles the new volumes
711 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
712 volume_path = os.path.join(backup_path, volume_name)
713 deltarobj.vol_no = volume_number
715 # we convert relative paths into absolute because CWD is changed
716 if not os.path.isabs(volume_path):
717 volume_path = os.path.join(cwd, volume_path)
719 if tarobj.fileobj is not None:
720 tarobj.fileobj.close()
722 deltarobj.logger.debug("opening volume %s" % volume_path)
724 tarobj.open_volume(volume_path, encryption=encryption)
726 # wraps some args from context into the handler
727 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
729 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
731 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
732 # calculate checksum and write into the stream
733 crc = binascii.crc32(s) & 0xFFFFffff
736 # start creating the tarfile
737 tarobj = tarfile.TarFile.open(tarfile_path,
738 mode='w' + self.mode,
739 format=tarfile.GNU_FORMAT,
740 concat='#' in self.mode,
741 encryption=self.encryptor,
742 max_volume_size=max_volume_size,
743 new_volume_handler=new_volume_handler,
744 save_to_members=False,
746 os.chdir(source_path)
748 # for each file to be in the backup, do:
749 for path in self._recursive_walk_dir('.'):
752 # calculate stat dict for current file
753 statd = self._stat_dict(path)
754 statd['path'] = u'snapshot://' + statd['path']
755 statd['volume'] = self.vol_no
758 tarobj.add(path, arcname = statd['path'], recursive=False)
759 except FileNotFoundError as exn:
760 # file vanished since the call to access(3) above
761 self.logger.warning ("object [%s] no longer available in "
762 "file system (error: %s); skipping"
764 continue # prevent indexing
766 # retrieve file offset
767 statd['offset'] = tarobj.get_last_member_offset()
768 self.logger.debug("backup %s" % statd['path'])
770 # store the stat dict in the index
771 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
772 crc = binascii.crc32(s, crc) & 0xffffffff
775 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
776 crc = binascii.crc32(s, crc) & 0xffffffff
778 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
783 index_sink.close (close_fileobj=True)
785 def create_diff_backup(self, source_path, backup_path, previous_index_path,
786 max_volume_size=None, extra_data=dict()):
791 - source_path: source path to the directory to back up.
792 - backup_path: path where the back up will be stored. Backup path will
793 be created if not existent.
794 - previous_index_path: index of the previous backup, needed to know
795 which files changed since then.
796 - max_volume_size: maximum volume size in megabytes (MB). Used to split
797 the backup in volumes. Optional (won't split in volumes by default).
799 NOTE: previous index is assumed to follow exactly the same format as
800 the index_mode setup in the constructor.
802 # check/sanitize input
803 if not isinstance(source_path, str):
804 raise Exception('Source path must be a string')
806 if not isinstance(backup_path, str):
807 raise Exception('Backup path must be a string')
809 if not os.path.exists(source_path) or not os.path.isdir(source_path):
810 raise Exception('Source path "%s" does not exist or is not a '\
811 'directory' % source_path)
813 if not isinstance(extra_data, dict):
814 raise Exception('extra_data must be a dictionary')
817 extra_data_str = json.dumps(extra_data)
819 raise Exception('extra_data is not json-serializable')
821 if not os.access(source_path, os.R_OK):
822 raise Exception('Source path "%s" is not readable' % source_path)
824 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
825 max_volume_size < 1):
826 raise Exception('max_volume_size must be a positive integer')
827 if max_volume_size != None:
828 max_volume_size = max_volume_size*1024*1024
830 if not isinstance(previous_index_path, str):
831 raise Exception('previous_index_path must be A string')
833 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
834 raise Exception('Index path "%s" does not exist or is not a '\
835 'file' % previous_index_path)
837 if not os.access(previous_index_path, os.R_OK):
838 raise Exception('Index path "%s" is not readable' % previous_index_path)
840 # try to create backup path if needed
841 os.makedirs(backup_path, exist_ok=True)
843 if not os.access(backup_path, os.W_OK):
844 raise Exception('Backup path "%s" is not writeable' % backup_path)
846 if source_path.endswith('/'):
847 source_path = source_path[:-1]
849 if backup_path.endswith('/'):
850 backup_path = backup_path[:-1]
852 # update current time
853 self.current_time = datetime.datetime.now()
855 if self.mode not in self.__file_extensions_dict:
856 raise Exception('Unrecognized extension')
858 # setup for encrypting payload
859 if self.encryptor is None:
860 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
862 # some initialization
865 # generate the first volume name
866 vol_name = self.volume_name_func(backup_path, is_full=False,
868 tarfile_path = os.path.join(backup_path, vol_name)
873 index_name = self.index_name_func(is_full=False)
874 index_path = os.path.join(backup_path, index_name)
875 index_sink = self.open_auxiliary_file(index_path, 'w')
877 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
879 Handles the new volumes
881 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
882 volume_number=volume_number)
883 volume_path = os.path.join(backup_path, volume_name)
884 deltarobj.vol_no = volume_number
886 # we convert relative paths into absolute because CWD is changed
887 if not os.path.isabs(volume_path):
888 volume_path = os.path.join(cwd, volume_path)
890 deltarobj.logger.debug("opening volume %s" % volume_path)
891 tarobj.open_volume(volume_path)
893 # wraps some args from context into the handler
894 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
896 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
898 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
899 # calculate checksum and write into the stream
900 crc = binascii.crc32(s) & 0xFFFFffff
903 # start creating the tarfile
904 tarobj = tarfile.TarFile.open(tarfile_path,
905 mode='w' + self.mode,
906 format=tarfile.GNU_FORMAT,
907 concat='#' in self.mode,
908 encryption=self.encryptor,
909 max_volume_size=max_volume_size,
910 new_volume_handler=new_volume_handler,
911 save_to_members=False,
915 # create the iterators, first the previous index iterator, then the
916 # source path directory iterator and collate and iterate them
917 if not os.path.isabs(previous_index_path):
918 previous_index_path = os.path.join(cwd, previous_index_path)
919 index_it = self.iterate_index_path(previous_index_path)
921 os.chdir(source_path)
922 dir_it = self._recursive_walk_dir('.')
923 dir_path_it = self.jsonize_path_iterator(dir_it)
931 # for each file to be in the backup, do:
932 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
934 # if file is not in the index, it means it's a new file, so we have
939 # if the file is not in the directory iterator, it means that it has
940 # been deleted, so we need to mark it as such
943 # if the file is in both iterators, it means it might have either
944 # not changed (in which case we will just list it in our index but
945 # it will not be included in the tar file), or it might have
946 # changed, in which case we will snapshot it.
947 elif ipath and dpath:
948 if self._equal_stat_dicts(ipath, dpath):
952 # TODO: when creating chained backups (i.e. diffing from another
953 # diff), we will need to detect the type of action in the previous
954 # index, because if it was delete and dpath is None, we should
957 if action == 'snapshot':
958 # calculate stat dict for current file
960 stat['path'] = "snapshot://" + dpath['path']
961 stat['volume'] = self.vol_no
963 self.logger.debug("[STORE] %s" % dpath['path'])
966 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
967 # retrieve file offset
968 stat['offset'] = tarobj.get_last_member_offset()
969 except FileNotFoundError as exn:
970 # file vanished since the call to access(3) above
971 self.logger.warning ("object [%s] no longer available in "
972 "file system (error: %s); skipping"
973 % (dpath ["path"], str (exn)))
974 stat = None # prevent indexing
976 elif action == 'delete':
977 path = self.unprefixed(ipath['path'])
979 u'path': u'delete://' + path,
980 u'type': ipath['type']
982 self.logger.debug("[DELETE] %s" % path)
984 # mark it as deleted in the backup
985 tarobj.add("/dev/null", arcname=stat['path'])
986 elif action == 'list':
988 path = self.unprefixed(ipath['path'])
989 stat['path'] = u'list://' + path
990 # unchanged files do not enter in the backup, only in the index
991 self.logger.debug("[UNCHANGED] %s" % path)
994 self.logger.warning('unknown action in create_diff_backup: {0}'
999 # store the stat dict in the index
1000 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
1001 crc = binascii.crc32(s, crc) & 0xffffffff
1004 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
1005 crc = binascii.crc32(s, crc) & 0xffffffff
1007 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
1016 def iterate_index_path(self, index_path, strict_validation=True):
1018 Returns an index iterator. Internally, it uses a classic iterator class.
1019 We do that instead of just yielding so that the iterator object can have
1020 an additional function to close the file descriptor that is opened in
1024 class IndexPathIterator(object):
1025 def __init__(self, delta_tar, index_path):
1026 self.delta_tar = delta_tar
1027 self.index_path = index_path
1029 self.extra_data = dict()
1039 def __enter__(self):
1041 Allows this iterator to be used with the "with" statement
1044 self.f = self.delta_tar.open_auxiliary_file \
1047 strict_validation=strict_validation)
1048 # check index header
1049 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1050 if j.get("type", '') != 'python-delta-tar-index' or\
1051 j.get('version', -1) != 1:
1052 raise Exception("invalid index file format: %s" % json.dumps(j))
1054 self.extra_data = j.get('extra_data', dict())
1056 # find BEGIN-FILE-LIST, ignore other headers
1058 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1059 if j.get('type', '') == 'BEGIN-FILE-LIST':
1063 def __exit__(self, type, value, tb):
1065 Allows this iterator to be used with the "with" statement
1072 # read each file in the index and process it to do the restore
1076 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1077 except Exception as e:
1082 op_type = j.get('type', '')
1084 # when we detect the end of the list, break the loop
1085 if op_type == 'END-FILE-LIST':
1091 if op_type not in ['directory', 'file', 'link']:
1092 self.delta_tar.logger.warning('unrecognized type to be '
1093 'restored: %s, line %d' % (op_type, l_no))
1095 return self.__next__()
1099 return IndexPathIterator(self, index_path)
1101 def iterate_tar_path(self, tar_path, new_volume_handler=None):
1103 Returns a tar iterator that iterates jsonized member items that contain
1104 an additional "member" field, used by RestoreHelper.
1106 class TarPathIterator(object):
1107 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
1108 self.delta_tar = delta_tar
1109 self.tar_path = tar_path
1111 self.last_member = None
1112 self.new_volume_handler = new_volume_handler
1120 self.tar_obj.close()
1122 def __enter__(self):
1124 Allows this iterator to be used with the "with" statement
1126 if self.tar_obj is None:
1128 if self.delta_tar.password is not None:
1129 decryptor = crypto.Decrypt \
1130 (password=self.delta_tar.password,
1131 key=self.delta_tar.crypto_key,
1133 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1134 mode='r' + self.delta_tar.mode,
1135 format=tarfile.GNU_FORMAT,
1136 concat='#' in self.delta_tar.mode,
1137 encryption=decryptor,
1138 new_volume_handler=self.new_volume_handler,
1139 save_to_members=False,
1143 def __exit__(self, type, value, tb):
1145 Allows this iterator to be used with the "with" statement
1148 self.tar_obj.close()
1153 Read each member and return it as a stat dict
1155 tarinfo = self.tar_obj.__iter__().__next__()
1156 # NOTE: here we compare if tarinfo.path is the same as before
1157 # instead of comparing the tarinfo object itself because the
1158 # object itself might change for multivol tarinfos
1159 if tarinfo is None or (self.last_member is not None and\
1160 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
1163 self.last_member = tarinfo
1166 if tarinfo.isfile():
1168 elif tarinfo.isdir():
1170 elif tarinfo.islnk() or tarinfo.issym():
1175 u'path': tarinfo.path,
1176 u'mode': tarinfo.mode,
1177 u'mtime': tarinfo.mtime,
1178 u'ctime': -1, # cannot restore
1179 u'uid': tarinfo.uid,
1180 u'gid': tarinfo.gid,
1181 u'inode': -1, # cannot restore
1182 u'size': tarinfo.size,
1186 return TarPathIterator(self, tar_path, new_volume_handler)
1188 def jsonize_path_iterator(self, iter, strip=0):
1190 converts the yielded items of an iterator into json path lines.
1192 strip: Strip the smallest prefix containing num leading slashes from
1197 path = iter.__next__()
1199 yield self._stat_dict(path), 0
1201 st = self._stat_dict(path)
1202 st['path'] = "/".join(path.split("/")[strip:])
1204 except StopIteration:
1207 def iterate_disaster_index (self, index):
1209 Mimick the behavior of the other object iterators, just with the inputs
1210 supplied directly as *index*.
1213 class RawIndexIterator(object):
1214 def __init__(self, delta_tar, index):
1215 self.delta_tar = delta_tar
1225 def __enter__(self):
1227 Allows this iterator to be used with the "with" statement
1229 self.iter = self.index.__iter__ ()
1232 def __exit__(self, type, value, tb):
1234 Allows this iterator to be used with the "with" statement
1238 idxent = self.iter.__next__ ()
1241 return RawIndexIterator(self, index)
1243 def collate_iterators(self, it1, it2):
1245 Collate two iterators, so that it returns pairs of the items of each
1246 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1247 when there's no match for the items in the other iterator.
1249 It assumes that the items in both lists are ordered in the same way.
1252 elem1, elem2 = None, None
1256 elem1, l_no = it1.__next__()
1257 except StopIteration:
1259 yield (None, elem2, l_no)
1261 if isinstance(elem2, tuple):
1263 yield (None, elem2, l_no)
1267 elem2 = it2.__next__()
1268 if isinstance(elem2, tuple):
1270 except StopIteration:
1272 yield (elem1, None, l_no)
1273 for elem1, l_no in it1:
1274 yield (elem1, None, l_no)
1277 index1 = self.unprefixed(elem1['path'])
1278 index2 = self.unprefixed(elem2['path'])
1279 i1, i2 = self.compare_indexes(index1, index2)
1281 yield1 = yield2 = None
1288 yield (yield1, yield2, l_no)
1290 def compare_indexes(self, index1, index2):
1292 Compare iterator indexes and return a tuple in the following form:
1293 if index1 < index2, returns (index1, None)
1294 if index1 == index2 returns (index1, index2)
1295 else: returns (None, index2)
1297 l1 = index1.split('/')
1298 l2 = index2.split('/')
1299 length = len(l2) - len(l1)
1302 return (index1, None)
1304 return (None, index2)
1306 for i1, i2 in zip(l1, l2):
1308 return (index1, None)
1310 return (None, index2)
1312 return (index1, index2)
1314 def list_backup(self, backup_tar_path, list_func=None):
1315 if not isinstance(backup_tar_path, str):
1316 raise Exception('Backup tar path must be a string')
1318 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1319 raise Exception('Source path "%s" does not exist or is not a '\
1320 'file' % backup_tar_path)
1322 if not os.access(backup_tar_path, os.R_OK):
1323 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1327 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
1329 Handles the new volumes
1331 volume_name = deltarobj.volume_name_func(backup_path, True,
1332 volume_number, guess_name=True)
1333 volume_path = os.path.join(backup_path, volume_name)
1335 # we convert relative paths into absolute because CWD is changed
1336 if not os.path.isabs(volume_path):
1337 volume_path = os.path.join(cwd, volume_path)
1338 tarobj.open_volume(volume_path, encryption=encryption)
1340 if self.decryptor is None:
1342 self.initialize_encryption (CRYPTO_MODE_DECRYPT,
1343 strict_validation=False)
1345 backup_path = os.path.dirname(backup_tar_path)
1346 if not os.path.isabs(backup_path):
1347 backup_path = os.path.join(cwd, backup_path)
1348 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
1350 tarobj = tarfile.TarFile.open(backup_tar_path,
1351 mode='r' + self.mode,
1352 format=tarfile.GNU_FORMAT,
1353 concat='#' in self.mode,
1354 encryption=self.decryptor,
1355 new_volume_handler=new_volume_handler,
1356 save_to_members=False,
1359 def filter(cls, list_func, tarinfo):
1360 if list_func is None:
1361 self.logger.info(tarinfo.path)
1365 filter = partial(filter, self, list_func)
1367 tarobj.extractall(filter=filter, unlink=True)
1370 def restore_backup(self, target_path, backup_indexes_paths=[],
1371 backup_tar_path=None, restore_callback=None,
1372 disaster=tarfile.TOLERANCE_STRICT, backup_index=None,
1373 strict_validation=True):
1378 - target_path: path to restore.
1379 - backup_indexes_paths: path to backup indexes, in descending date order.
1380 The indexes indicate the location of their respective backup volumes,
1381 and multiple indexes are needed to be able to restore diff backups.
1382 Note that this is an optional parameter: if not suplied, it will
1383 try to restore directly from backup_tar_path.
1384 - backup_tar_path: path to the backup tar file. Used as an alternative
1385 to backup_indexes_paths to restore directly from a tar file without
1386 using any file index. If it's a multivol tarfile, volume_name_func
1388 - restore_callback: callback function to be called during restore.
1389 This is passed to the helper and gets called for every file.
1391 NOTE: If you want to use an index to restore a backup, this function
1392 only supports to do so when the tarfile mode is either uncompressed or
1393 uses concat compress mode, because otherwise it would be very slow.
1395 NOTE: Indices are assumed to follow the same format as the index_mode
1396 specified in the constructor.
1398 Returns the list of files that could not be restored, if there were
1401 # check/sanitize input
1402 if not isinstance(target_path, str):
1403 raise Exception('Target path must be a string')
1405 if backup_indexes_paths is None and backup_tar_path == []:
1406 raise Exception("You have to either provide index paths or a tar path")
1408 if isinstance (backup_index, list) is True:
1410 elif len(backup_indexes_paths) == 0:
1416 if not isinstance(backup_tar_path, str):
1417 raise Exception('Backup tar path must be a string')
1419 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1420 raise Exception('Source path "%s" does not exist or is not a '\
1421 'file' % backup_tar_path)
1423 if not os.access(backup_tar_path, os.R_OK):
1424 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1426 if not isinstance(backup_indexes_paths, list):
1427 raise Exception('backup_indexes_paths must be a list')
1429 if self.mode.startswith(':') or self.mode.startswith('|'):
1430 raise Exception('Restore only supports either uncompressed tars'
1431 ' or concat compression when restoring from an index, and '
1432 ' the open mode you provided is "%s"' % self.mode)
1434 for index in backup_indexes_paths:
1435 if not isinstance(index, str):
1436 raise Exception('indices must be strings')
1438 if not os.path.exists(index) or not os.path.isfile(index):
1439 raise Exception('Index path "%s" does not exist or is not a '\
1442 if not os.access(index, os.R_OK):
1443 raise Exception('Index path "%s" is not readable' % index)
1445 # try to create backup path if needed
1446 os.makedirs(target_path, exist_ok=True)
1448 # make backup_tar_path absolute so that iterate_tar_path works fine
1449 if backup_tar_path and not os.path.isabs(backup_tar_path):
1450 backup_tar_path = os.path.abspath(backup_tar_path)
1453 os.chdir(target_path)
1455 # setup for decrypting payload
1456 if self.decryptor is None:
1458 self.initialize_encryption (CRYPTO_MODE_DECRYPT,
1459 strict_validation=strict_validation)
1462 index_it = self.iterate_tar_path(backup_tar_path)
1463 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
1464 tarobj=index_it.tar_obj)
1465 elif mode == "diff":
1466 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1469 # get iterator from newest index at _data[0]
1470 index1 = helper._data[0]["path"]
1472 self.iterate_index_path(index1,
1473 strict_validation=strict_validation)
1474 except tarfile.DecryptionError as exn:
1475 self.logger.error("failed to decrypt file [%s]: %s; is this an "
1476 "actual encrypted index file?"
1477 % (index1, str (exn)))
1478 return [(index1, exn)]
1479 except Exception as exn:
1481 self.logger.error("failed to read file [%s]: %s; is this an "
1482 "actual index file?" % (index1, str (exn)))
1483 return [(index1, exn)]
1484 elif mode == "disaster":
1485 index_it = self.iterate_disaster_index (backup_index)
1486 helper = RestoreHelper (self, cwd, backup_path=backup_tar_path,
1487 backup_index=backup_index,
1490 index_decryptor = helper._data[0]["decryptor"]
1492 dir_it = self._recursive_walk_dir('.')
1493 dir_path_it = self.jsonize_path_iterator(dir_it)
1495 failed = [] # irrecoverable files
1497 # for each file to be restored, do:
1498 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1500 upath = dpath['path']
1501 op_type = dpath['type']
1503 upath = self.unprefixed(ipath['path'])
1504 op_type = ipath['type']
1507 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
1510 # if types of the file mismatch, the file needs to be deleted
1512 if ipath is not None and dpath is not None and\
1513 dpath['type'] != ipath['type']:
1514 helper.delete(upath)
1516 # if file not found in dpath, we can directly restore from index
1518 # if the file doesn't exist and it needs to be deleted, it
1519 # means that work is already done
1520 if ipath['path'].startswith('delete://'):
1523 self.logger.debug("restore %s" % ipath['path'])
1524 helper.restore(ipath, l_no, restore_callback)
1525 except Exception as e:
1526 iipath = ipath.get ("path", "")
1527 self.logger.error("FAILED to restore: {} ({})"
1529 if disaster != tarfile.TOLERANCE_STRICT:
1530 failed.append ((iipath, e))
1533 # if both files are equal, we have nothing to restore
1534 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1537 # we have to restore the file, but first we need to delete the
1538 # current existing file.
1539 # we don't delete the file if it's a directory, because it might
1540 # just have changed mtime, so it's quite inefficient to remove
1543 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
1544 helper.delete(upath)
1545 self.logger.debug("restore %s" % ipath['path'])
1547 helper.restore(ipath, l_no, restore_callback)
1548 except Exception as e:
1549 if disaster == tarfile.TOLERANCE_STRICT:
1551 failed.append ((ipath.get ("path", ""), e))
1554 # if the file is not in the index (so it comes from the target
1555 # directory) then we have to delete it
1557 self.logger.debug("delete %s" % upath)
1558 helper.delete(upath)
1560 helper.restore_directories_permissions()
1568 def recover_backup(self, target_path, backup_indexes_paths=[],
1569 restore_callback=None):
1571 Walk the index, extracting objects in disaster mode. Bad files are
1572 reported along with a reason.
1574 return self.restore_backup(target_path,
1575 backup_indexes_paths=backup_indexes_paths,
1576 disaster=tarfile.TOLERANCE_RECOVER,
1577 strict_validation=False)
1580 def rescue_backup(self, target_path, backup_tar_path,
1581 restore_callback=None):
1583 More aggressive “unfsck” mode: do not rely on the index data as the
1584 files may be corrupt; skim files for header-like information and
1585 attempt to retrieve the data.
1587 def gen_volume_name (nvol):
1588 return os.path.join (os.path.dirname (backup_tar_path),
1589 self.volume_name_func (backup_tar_path,
1593 backup_index = tarfile.gen_rescue_index (gen_volume_name,
1595 password=self.password,
1596 key=self.crypto_key)
1598 return self.restore_backup(target_path,
1599 backup_index=backup_index,
1600 backup_tar_path=backup_tar_path,
1601 disaster=tarfile.TOLERANCE_RESCUE,
1602 strict_validation=False)
1605 def _parse_json_line(self, f, l_no):
1607 Read line from file like object and process it as JSON.
1612 j = json.loads(l.decode('UTF-8'))
1613 except UnicodeDecodeError as e:
1614 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1616 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1617 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1620 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1621 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1623 except ValueError as e:
1624 raise Exception("error parsing this json line "
1625 "(line number %d): %s" % (l_no, l))
1629 class RestoreHelper(object):
1631 Class used to help to restore files from indices
1634 # holds the dicts of data
1641 # list of directories to be restored. This is done as a last step, see
1642 # tarfile.extractall for details.
1645 _disaster = tarfile.TOLERANCE_STRICT
1647 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
1648 backup_index=None, tarobj=None,
1649 disaster=tarfile.TOLERANCE_STRICT):
1651 Constructor opens the tars and init the data structures.
1655 - Index list must be provided in reverse order (newer first).
1656 - “newer first” apparently means that if there are n backups
1657 provided, the last full backup is at index n-1 and the most recent
1658 diff backup is at index 0.
1659 - Only the first, the second, and the last elements of
1660 ``index_list`` are relevant, others will not be accessed.
1661 - If no ``index_list`` is provided, both ``tarobj`` and
1662 ``backup_path`` must be passed.
1663 - If ``index_list`` is provided, the values of ``tarobj`` and
1664 ``backup_path`` are ignored.
1667 self._directories = []
1668 self._deltatar = deltatar
1670 self._password = deltatar.password
1671 self._crypto_key = deltatar.crypto_key
1672 self._decryptors = []
1673 self._disaster = disaster
1675 # Disable strict checking for linearly increasing IVs when running
1676 # in rescue or recover mode.
1677 strict_validation = disaster == tarfile.TOLERANCE_STRICT
1684 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1685 self.canchown = True
1687 self.canchown = False
1689 if isinstance (backup_index, list) is True:
1690 decryptor = self._deltatar.decryptor
1692 [{ "curr_vol_no" : None
1696 , "path" : backup_path
1699 , "last_itelement" : None
1701 , "new_volume_handler" :
1702 partial(self.new_volume_handler,
1703 self._deltatar, self._cwd, True,
1704 os.path.dirname(backup_path), decryptor)
1705 , "decryptor" : decryptor
1707 elif index_list is not None:
1708 for index in index_list:
1709 is_full = index == index_list[-1]
1712 if self._password is not None:
1713 decryptor = crypto.Decrypt (password=self._password,
1714 key=self._crypto_key,
1715 strict_ivs=strict_validation)
1717 # make paths absolute to avoid cwd problems
1718 if not os.path.isabs(index):
1719 index = os.path.normpath(os.path.join(cwd, index))
1729 last_itelement = None,
1731 new_volume_handler = partial(self.new_volume_handler,
1732 self._deltatar, self._cwd, is_full,
1733 os.path.dirname(index), decryptor),
1734 decryptor = decryptor
1736 self._data.append(s)
1738 # make paths absolute to avoid cwd problems
1739 if not os.path.isabs(backup_path):
1740 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
1742 # update the new_volume_handler of tar_obj
1743 tarobj.new_volume_handler = partial(self.new_volume_handler,
1744 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
1745 self._deltatar.decryptor)
1754 last_itelement = None,
1756 new_volume_handler = tarobj.new_volume_handler,
1757 decryptor = self._deltatar.decryptor
1759 self._data.append(s)
1764 Closes all open files
1766 for data in self._data:
1768 data['vol_fd'].close()
1769 data['vol_fd'] = None
1771 data['tarobj'].close()
1772 data['tarobj'] = None
1774 def delete(self, path):
1778 if not os.path.exists(path):
1781 # to preserve parent directory mtime, we save it
1782 parent_dir = os.path.dirname(path) or os.getcwd()
1783 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1785 if os.path.isdir(path) and not os.path.islink(path):
1790 # now we restore parent_directory mtime
1791 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1793 def restore(self, itpath, l_no, callback=None):
1795 Restore the path from the appropriate backup. Receives the current path
1796 from the newest (=first) index iterator. itpath must be not null.
1797 callback is a custom function that gets called for every file.
1799 NB: This function takes the attribute ``_data`` as input but will only
1800 ever use its first and, if available, second element. Anything else in
1801 ``._data[]`` will be ignored.
1803 path = itpath['path']
1805 # Calls the callback function
1809 if path.startswith('delete://'):
1810 # the file has previously been deleted already in restore_backup in
1811 # all cases so we just need to finish
1814 # get data from newest index (_data[0])
1815 data = self._data[0]
1816 upath = self._deltatar.unprefixed(path)
1818 # to preserve parent directory mtime, we save it
1819 parent_dir = os.path.dirname(upath) or os.getcwd()
1820 os.makedirs(parent_dir, exist_ok=True)
1821 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1823 # if path is found in the newest index as to be snapshotted, deal with it
1825 if path.startswith('snapshot://'):
1826 self.restore_file(itpath, data, path, l_no, upath)
1828 # now we restore parent_directory mtime
1829 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1832 # we go from index to index, finding the path in the index, then finding
1833 # the index with the most recent snapshot of the file being restored
1835 # Right now we support diff backups, only. No incremental backups.
1836 # As a result _data[0] is always the diff backup index
1837 # and _data[1] the full backup index.
1838 if len(self._data) == 2:
1839 data = self._data[1]
1840 d, l_no, dpath = self.find_path_in_index(data, upath)
1842 self._deltatar.logger.warning('Error restoring file %s from '
1843 'index, not found in index %s' % (path, data['path']))
1846 cur_path = d.get('path', '')
1847 if cur_path.startswith('delete://'):
1848 self._deltatar.logger.warning(('Strange thing happened, file '
1849 '%s was listed in first index but deleted by another '
1850 'one. Path was ignored and untouched.') % path)
1852 elif cur_path.startswith('snapshot://'):
1853 # this code path is reached when the file is unchanged
1854 # in the newest index and therefore of type 'list://'
1855 self.restore_file(d, data, path, l_no, dpath)
1857 # now we restore parent_directory mtime
1858 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1861 # error code path is reached when:
1862 # a) we have more than two indexes (unsupported atm)
1863 # b) both indexes contain a list:// entry (logic error)
1864 # c) we have just one index and it also contains list://
1865 self._deltatar.logger.warning(('Error restoring file %s from index, '
1866 'snapshot not found in any index') % path)
1868 def find_path_in_index(self, data, upath):
1869 # NOTE: we restart the iterator sometimes because the iterator can be
1870 # walked over completely multiple times, for example if one path if not
1871 # found in one index and we have to go to the next index.
1872 it = data['iterator']
1874 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
1875 d, l_no = it.__next__()
1877 d = data['last_itelement']
1878 l_no = data['last_lno']
1881 dpath = self._deltatar.unprefixed(d.get('path', ''))
1883 data['last_itelement'] = d
1884 data['last_lno'] = l_no
1885 return d, l_no, dpath
1887 up, dp = self._deltatar.compare_indexes(upath, dpath)
1888 # any time upath should have appeared before current dpath, it means
1889 # upath is just not in this index and we should stop
1891 data['last_itelement'] = d
1892 data['last_lno'] = l_no
1896 d, l_no = it.__next__()
1897 except StopIteration:
1898 data['last_itelement'] = d
1899 data['last_lno'] = l_no
1902 def restore_directories_permissions(self):
1904 Restore directory permissions when everything have been restored
1911 self._directories.sort(key=operator.attrgetter('name'))
1912 self._directories.reverse()
1914 # Set correct owner, mtime and filemode on directories.
1915 for member in self._directories:
1916 dirpath = member.name
1918 os.chmod(dirpath, member.mode)
1919 os.utime(dirpath, (member.mtime, member.mtime))
1921 # We have to be root to do so.
1923 g = grp.getgrnam(member.gname)[2]
1927 u = pwd.getpwnam(member.uname)[2]
1931 if member.issym and hasattr(os, "lchown"):
1932 os.lchown(dirpath, u, g)
1934 os.chown(dirpath, u, g)
1935 except EnvironmentError:
1936 raise tarfile.ExtractError("could not change owner")
1938 except tarfile.ExtractError as e:
1939 self._deltatar.logger.warning('tarfile: %s' % e)
1942 def new_volume_handler(deltarobj, cwd, is_full, backup_path, decryptor, tarobj, base_name, volume_number):
1944 Set up a new volume and perform the tasks necessary for transitioning
1947 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1948 volume_number, guess_name=True)
1949 volume_path = os.path.join(backup_path, volume_name)
1951 # we convert relative paths into absolute because CWD is changed
1952 if not os.path.isabs(volume_path):
1953 volume_path = os.path.join(cwd, volume_path)
1955 tarobj.open_volume(volume_path, encryption=decryptor)
1957 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
1959 Restores a snapshot of a file from a specific backup
1961 op_type = file_data.get('type', -1)
1962 member = file_data.get('member', None)
1963 ismember = bool(member)
1965 # when member is set, then we can assume everything is right and we
1966 # just have to restore the path
1968 vol_no = file_data.get('volume', -1)
1970 if not isinstance(vol_no, int) or vol_no < 0:
1971 self._deltatar.logger.warning('unrecognized type to be restored: '
1972 '%s, line %d' % (op_type, l_no))
1974 # setup the volume that needs to be read. only needed when member is
1976 if index_data['curr_vol_no'] != vol_no:
1977 index_data['curr_vol_no'] = vol_no
1978 backup_path = os.path.dirname(index_data['path'])
1979 vol_name = self._deltatar.volume_name_func(backup_path,
1980 index_data['is_full'], vol_no, guess_name=True)
1981 vol_path = os.path.join(backup_path, vol_name)
1982 if index_data['vol_fd']:
1983 index_data['vol_fd'].close()
1984 index_data['vol_fd'] = open(vol_path, 'rb')
1986 # force reopen of the tarobj because of new volume
1987 if index_data['tarobj']:
1988 index_data['tarobj'].close()
1989 index_data['tarobj'] = None
1991 # seek tarfile if needed
1992 offset = file_data.get('offset', -1)
1993 if index_data['tarobj']:
1994 if self._disaster == tarfile.TOLERANCE_RESCUE:
1995 # force a seek and reopen
1996 index_data['tarobj'].close()
1997 index_data['tarobj'] = None
2000 member = index_data['tarobj'].__iter__().__next__()
2001 except tarfile.DecryptionError:
2003 except tarfile.CompressionError:
2006 if not member or member.path != file_data['path']:
2007 # force a seek and reopen
2008 index_data['tarobj'].close()
2009 index_data['tarobj'] = None
2012 # open the tarfile if needed
2013 if not index_data['tarobj']:
2014 index_data['vol_fd'].seek(offset)
2015 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
2016 fileobj=index_data['vol_fd'],
2017 format=tarfile.GNU_FORMAT,
2018 concat='#' in self._deltatar.mode,
2019 encryption=index_data["decryptor"],
2020 new_volume_handler=index_data['new_volume_handler'],
2021 save_to_members=False,
2022 tolerance=self._disaster)
2024 member = index_data['tarobj'].__iter__().__next__()
2026 member.path = unprefixed_path
2027 member.name = unprefixed_path
2029 if op_type == 'directory':
2030 self.add_member_dir(member)
2031 member = copy.copy(member)
2032 member.mode = 0o0700
2034 # if it's an existing directory, we then don't need to recreate it
2035 # just set the right permissions, mtime and that kind of stuff
2036 if os.path.exists(member.path):
2040 # set current volume number in tarobj, otherwise the extraction of the
2041 # file might fail when trying to extract a multivolume member
2042 index_data['tarobj'].volume_number = index_data['curr_vol_no']
2044 def ignore_symlink (member, *_args):
2045 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
2047 # finally, restore the file
2048 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink,
2051 def add_member_dir(self, member):
2053 Add member dir to be restored at the end
2055 if not self.canchown:
2056 self._directories.append(DirItem(name=member.name, mode=member.mode,
2057 mtime=member.mtime))
2059 self._directories.append(DirItem(name=member.name, mode=member.mode,
2060 mtime=member.mtime, gname=member.gname, uname=member.uname,
2061 uid=member.uid, gid=member.gid, issym=member.issym()))
2063 class DirItem(object):
2064 def __init__(self, **kwargs):
2065 for k, v in kwargs.items():