3 # Copyright (C) 2013, 2014 Intra2net AG
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published
7 # by the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see
17 # <http://www.gnu.org/licenses/lgpl-3.0.html>
19 # Author: Eduardo Robles Elvira <edulix@wadobo.com>
21 DELTATAR_HEADER_VERSION = 1
22 DELTATAR_PARAMETER_VERSION = 1
35 from functools import partial
40 class NullHandler(logging.Handler):
41 def emit(self, record):
45 logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
53 # encryption direction
54 CRYPTO_MODE_ENCRYPT = 0
55 CRYPTO_MODE_DECRYPT = 1
57 # The canonical extension for encrypted backup files regardless of the actual
58 # encryption parameters is “.pdtcrypt”. This is analogous to the encryption
59 # header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
60 # Since the introduction of the versioned header there no longer any need
61 # for encoding encryption parameters in the file extensions (“.aes128” and
63 PDTCRYPT_EXTENSION = "pdtcrypt"
67 AUXILIARY_FILE_INDEX = 0
68 AUXILIARY_FILE_INFO = 1
70 class DeltaTar(object):
72 Backup class used to create backups
75 # list of files to exclude in the backup creation or restore operation. It
76 # can contain python regular expressions.
79 # list of files to include in the backup creation or restore operation. It
80 # can contain python regular expressions. If empty, all files in the source
81 # path will be backed up (when creating a backup) or all the files in the
82 # backup will be restored (when restoring a backup), but if included_files
83 # is set then only the files include in the list will be processed.
86 # custom filter of files to be backed up (or restored). Unused and unset
87 # by default. The function receives a file path and must return a boolean.
90 # mode in which the delta will be created (when creating a backup) or
91 # opened (when restoring). Accepts modes analog to the tarfile library.
94 # used together with aes modes to encrypt and decrypt backups.
99 # parameter version to use when encrypting; note that this has no effect
100 # on decryption since the required settings are determined from the headers
101 crypto_version = DELTATAR_HEADER_VERSION
102 crypto_paramversion = None
104 # when encrypting or decrypting, these hold crypto handlers; created before
105 # establishing the Tarfile stream iff a password is supplied.
109 # python logger object.
112 # specifies the index mode in the same format as @param mode, but without
113 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
114 # that the index is encrypted if no password is given in the constructor.
117 # current time for this backup. Used for file names and file creation checks
120 # extra data to included in the header of the index file when creating a
124 # valid tarfile modes and their corresponding default file extension
125 __file_extensions_dict = {
134 '#gz.pdtcrypt': '.gz',
139 # valid index modes and their corresponding default file extension
140 __index_extensions_dict = {
144 'gz.pdtcrypt': '.gz',
148 # valid path prefixes
149 __path_prefix_list = [
155 def __init__(self, excluded_files=[], included_files=[],
156 filter_func=None, mode="", password=None,
157 crypto_key=None, nacl=None,
158 crypto_version=DELTATAR_HEADER_VERSION,
159 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
160 logger=None, index_mode=None, index_name_func=None,
161 volume_name_func=None):
163 Constructor. Configures the diff engine.
166 - excluded_files: list of files to exclude in the backup creation or
167 restore operation. It can contain python regular expressions.
169 - included_files: list of files to include in the backup creation or
170 restore operation. It can contain python regular expressions. If
171 empty, all files in the source path will be backed up (when creating a
172 backup) or all the files in the backup will be restored (when
173 restoring a backup), but if included_files is set then only the files
174 include in the list will be processed.
176 - filter_func: custom filter of files to be backed up (or restored).
177 Unused and unset by default. The function receives a file path and
178 must return a boolean.
180 - mode: mode in which the delta will be created (when creating a backup)
181 or opened (when restoring). Accepts the same modes as the tarfile
182 library. Valid modes are:
185 ':' open uncompressed
186 ':gz' open with gzip compression
187 ':bz2' open with bzip2 compression
188 '|' open an uncompressed stream of tar blocks
189 '|gz' open a gzip compressed stream of tar blocks
190 '|bz2' open a bzip2 compressed stream of tar blocks
191 '#gz' open a stream of gzip compressed tar blocks
193 - crypto_key: used to encrypt and decrypt backups. Encryption will
194 be enabled automatically if a key is supplied. Requires a salt to be
197 - nacl: salt that was used to derive the encryption key for embedding
198 in the PDTCRYPT header. Not needed when decrypting and when
199 encrypting with password.
201 - password: used to encrypt and decrypt backups. Encryption will be
202 enabled automatically if a password is supplied.
204 - crypto_version: version of the format, determining the kind of PDT
207 - crypto_paramversion: optionally request encryption conforming to
208 a specific parameter version. Defaults to the standard PDT value
209 which as of 2017 is the only one available.
211 - logger: python logger object. Optional.
213 - index_mode: specifies the index mode in the same format as @param
214 mode, but without the ':', '|' or '#' at the begining. If encryption
215 is requested it will extend to the auxiliary (index, info) files as
216 well. This is an optional parameter that will automatically mimic
217 @param mode by default if not provided. Valid modes are:
220 'gz' open with gzip compression
221 'bz2' open with bzip2 compression
223 - index_name_func: function that sets a custom name for the index file.
224 This function receives a flag to indicate whether the name will be
225 used for a full or diff backup. The backup path will be prepended to
228 - volume_name_func: function that defines the name of tar volumes. It
229 receives the backup_path, if it's a full backup and the volume number,
230 and must return the name for the corresponding volume name. Optional,
231 DeltaTar has default names for tar volumes.
234 if mode not in self.__file_extensions_dict:
235 raise Exception('Unrecognized extension mode=[%s] requested for files'
238 self.excluded_files = excluded_files
239 self.included_files = included_files
240 self.filter_func = filter_func
241 self.logger = logging.getLogger('deltatar.DeltaTar')
243 self.logger.addHandler(logger)
246 if crypto_key is not None:
247 self.crypto_key = crypto_key
248 self.nacl = nacl # encryption only
250 if password is not None:
251 self.password = password
253 if crypto_version is not None:
254 self.crypto_version = crypto_version
256 if crypto_paramversion is not None:
257 self.crypto_paramversion = crypto_paramversion
259 # generate index_mode
260 if index_mode is None:
266 elif mode not in self.__index_extensions_dict:
267 raise Exception('Unrecognized extension mode=[%s] requested for index'
270 self.index_mode = index_mode
271 self.current_time = datetime.datetime.now()
273 if index_name_func is not None:
274 self.index_name_func = index_name_func
276 if volume_name_func is not None:
277 self.volume_name_func = volume_name_func
279 def pick_extension(self, kind, mode=None):
281 Choose the extension depending on a) the kind of file given, b) the
282 processing mode, and c) the current encryption settings.
285 if kind == PDT_TYPE_ARCHIVE:
288 mode = self.__index_extensions_dict [self.index_mode]
290 if self.crypto_key is not None or self.password is not None:
291 ret += "." + PDTCRYPT_EXTENSION
294 def index_name_func(self, is_full): # pylint: disable=method-hidden
296 Callback for setting a custom name for the index file. Depending on
297 whether *is_full* is set, it will create a suitable name for a full
300 prefix = "bfull" if is_full else "bdiff"
301 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
302 extension = self.pick_extension \
304 self.__index_extensions_dict [self.index_mode])
306 return "%s-%s.index%s" % (prefix, date_str, extension)
308 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
309 is_full, volume_number,
312 function that defines the name of tar volumes. It receives the
313 backup_path, if it's a full backup and the volume number, and must return
314 the name for the corresponding volume name. Optional, DeltaTar has default
315 names for tar volumes.
317 If guess_name is activated, the file is intended not to be created but
318 to be found, and thus the date will be guessed.
320 prefix = "bfull" if is_full else "bdiff"
321 extension = self.pick_extension \
323 self.__file_extensions_dict [self.mode])
326 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
327 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
329 prefix = prefix + "-"
330 postfix = "-%03d%s" % (volume_number + 1, extension)
331 for f in os.listdir(backup_path):
332 if f.startswith(prefix) and f.endswith(postfix):
334 raise Exception("volume not found")
337 def filter_path(self, path, source_path="", is_dir=None):
339 Filters a path, given the source_path, using the filtering properties
340 set in the constructor.
341 The filtering order is:
342 1. included_files (if any)
344 3. filter_func (which must return whether the file is accepted or not)
347 if len(source_path) > 0:
348 # ensure that exactly one '/' at end of dir is also removed
349 source_path = source_path.rstrip(os.sep) + os.sep
350 path = path[len(source_path):]
352 # 1. filter included_files
354 if len(self.included_files) > 0:
356 for i in self.included_files:
357 # it can be either a regexp or a string
358 if isinstance(i, str):
359 # if the string matches, then continue
364 # if the string ends with / it's a directory, and if the
365 # path is contained in it, it is included
366 if i.endswith('/') and path.startswith(i):
370 # if the string doesn't end with /, add it and do the same
372 elif path.startswith(i + '/'):
376 # check for PARENT_MATCH
379 if not dir_path.endswith('/'):
382 if i.startswith(dir_path):
385 # if it's a reg exp, then we just check if it matches
386 elif isinstance(i, re._pattern_type):
391 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
393 if match == NO_MATCH:
396 # when a directory is in PARENT_MATCH, it doesn't matter if it's
397 # excluded. It's subfiles will be excluded, but the directory itself
399 if match != PARENT_MATCH:
400 for e in self.excluded_files:
401 # it can be either a regexp or a string
402 if isinstance(e, str):
403 # if the string matches, then exclude
407 # if the string ends with / it's a directory, and if the
408 # path starts with the directory, then exclude
409 if e.endswith('/') and path.startswith(e):
412 # if the string doesn't end with /, do the same check with
414 elif path.startswith(e + '/'):
417 # if it's a reg exp, then we just check if it matches
418 elif isinstance(e, re._pattern_type):
422 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
425 return self.filter_func(path)
429 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
431 Walk a directory recursively, yielding each file/directory
434 source_path = source_path.rstrip(os.sep)
439 beginning_size = len(source_path) + 1 # +1 for os.sep
441 queue = [source_path]
444 cur_path = queue.pop(0)
446 # it might have been removed in the mean time
447 if not os.path.exists(cur_path):
450 for filename in sorted(os.listdir(cur_path)):
451 child = os.path.join(cur_path, filename)
452 is_dir = os.path.isdir(child)
453 status = self.filter_path(child, source_path, is_dir)
454 if status == NO_MATCH:
456 if not os.access(child, os.R_OK):
457 self.logger.warning('Error accessing possibly locked file %s' % child)
461 yield child[beginning_size:]
463 if is_dir and (status == MATCH or status == PARENT_MATCH):
466 def _stat_dict(self, path):
468 Returns a dict with the stat data used to compare files
470 stinfo = os.stat(path)
471 mode = stinfo.st_mode
474 if stat.S_ISDIR(mode):
476 elif stat.S_ISREG(mode):
478 elif stat.S_ISLNK(mode):
485 u'mtime': int(stinfo.st_mtime),
486 u'ctime': int(stinfo.st_ctime),
487 u'uid': stinfo.st_uid,
488 u'gid': stinfo.st_gid,
489 u'inode': stinfo.st_ino,
490 u'size': stinfo.st_size
493 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
495 Return if the dicts are equal in the stat keys
497 keys = [u'type', u'mode',u'size', u'mtime',
498 # not restored: u'inode', u'ctime'
501 # only if user is root, then also check gid/uid. otherwise do not check it,
502 # because tarfile can chown in case of being superuser only
504 # also, skip the check in rpmbuild since the sources end up with the
505 # uid:gid of the packager while the extracted files are 0:0.
506 if hasattr(os, "geteuid") and os.geteuid() == 0 \
507 and os.getenv ("RPMBUILD_OPTIONS") is None:
511 if (not d1 and d2 != None) or (d1 != None and not d2):
514 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
517 type = d1.get('type', '')
520 # size doesn't matter for directories
521 if type == 'directory' and key == 'size':
523 if d1.get(key, -1) != d2.get(key, -2):
527 def prefixed(self, path, listsnapshot_equal=False):
529 if a path is not prefixed, return it prefixed
531 for prefix in self.__path_prefix_list:
532 if path.startswith(prefix):
533 if listsnapshot_equal and prefix == u'list://':
534 return u'snapshot://' + path[len(prefix):]
536 return u'snapshot://' + path
538 def unprefixed(self, path):
540 remove a path prefix if any
542 for prefix in self.__path_prefix_list:
543 if path.startswith(prefix):
544 return path[len(prefix):]
548 def initialize_encryption (self, mode):
549 password = self.password
550 key = self.crypto_key
553 if key is None and password is None:
555 if mode == CRYPTO_MODE_ENCRYPT:
556 return crypto.Encrypt (password=password,
559 version=self.crypto_version,
560 paramversion=self.crypto_paramversion)
561 if mode == CRYPTO_MODE_DECRYPT:
562 return crypto.Decrypt (password=password, key=key)
564 raise Exception ("invalid encryption mode [%r]" % mode)
567 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
569 Given the specified configuration, opens a file for reading or writing,
570 inheriting the encryption and compression settings from the backup.
571 Returns a file object ready to use.
573 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
576 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
577 Both the info and the auxiliary file have a globally
578 unique, constant counter value.
581 if self.index_mode.startswith('gz'):
583 elif self.index_mode.startswith('bz2'):
591 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
593 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
595 if crypto_ctx is not None:
596 if kind == AUXILIARY_FILE_INFO:
597 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
598 elif kind == AUXILIARY_FILE_INDEX:
599 enccounter = crypto.AES_GCM_IV_CNT_INDEX
601 raise Exception ("invalid kind of aux file %r" % kind)
603 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
604 bufsize=tarfile.RECORDSIZE, fileobj=None,
605 encryption=crypto_ctx, enccounter=enccounter)
610 def create_full_backup(self, source_path, backup_path,
611 max_volume_size=None, extra_data=dict()):
613 Creates a full backup.
616 - source_path: source path to the directory to back up.
617 - backup_path: path where the back up will be stored. Backup path will
618 be created if not existent.
619 - max_volume_size: maximum volume size in megabytes. Used to split the
620 backup in volumes. Optional (won't split in volumes by default).
621 - extra_data: a json-serializable dictionary with information that you
622 want to be included in the header of the index file
625 if not isinstance(source_path, str):
626 raise Exception('Source path must be a string')
628 if not isinstance(backup_path, str):
629 raise Exception('Backup path must be a string')
631 if not os.path.exists(source_path) or not os.path.isdir(source_path):
632 raise Exception('Source path "%s" does not exist or is not a '\
633 'directory' % source_path)
635 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
636 max_volume_size < 1):
637 raise Exception('max_volume_size must be a positive integer')
638 if max_volume_size != None:
639 max_volume_size = max_volume_size*1024*1024
641 if not isinstance(extra_data, dict):
642 raise Exception('extra_data must be a dictionary')
645 extra_data_str = json.dumps(extra_data)
647 raise Exception('extra_data is not json-serializable')
649 if not os.access(source_path, os.R_OK):
650 raise Exception('Source path "%s" is not readable' % source_path)
652 # try to create backup path if needed
653 if not os.path.exists(backup_path):
654 os.makedirs(backup_path)
656 if not os.access(backup_path, os.W_OK):
657 raise Exception('Backup path "%s" is not writeable' % backup_path)
659 if source_path.endswith('/'):
660 source_path = source_path[:-1]
662 if backup_path.endswith('/'):
663 backup_path = backup_path[:-1]
665 # update current time
666 self.current_time = datetime.datetime.now()
668 if self.mode not in self.__file_extensions_dict:
669 raise Exception('Unrecognized extension')
671 # setup for encrypting payload
672 if self.encryptor is None:
673 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
675 # some initialization
678 # generate the first volume name
679 vol_name = self.volume_name_func(backup_path, True, 0)
680 tarfile_path = os.path.join(backup_path, vol_name)
683 index_name = self.index_name_func(True)
684 index_path = os.path.join(backup_path, index_name)
685 index_sink = self.open_auxiliary_file(index_path, 'w')
689 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
691 Handles the new volumes
693 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
694 volume_path = os.path.join(backup_path, volume_name)
695 deltarobj.vol_no = volume_number
697 # we convert relative paths into absolute because CWD is changed
698 if not os.path.isabs(volume_path):
699 volume_path = os.path.join(cwd, volume_path)
701 if tarobj.fileobj is not None:
702 tarobj.fileobj.close()
704 deltarobj.logger.debug("opening volume %s" % volume_path)
706 tarobj.open_volume(volume_path, encryption=encryption)
708 # wraps some args from context into the handler
709 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
711 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
713 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
714 # calculate checksum and write into the stream
715 crc = binascii.crc32(s) & 0xFFFFffff
718 # start creating the tarfile
719 tarobj = tarfile.TarFile.open(tarfile_path,
720 mode='w' + self.mode,
721 format=tarfile.GNU_FORMAT,
722 concat='#' in self.mode,
723 encryption=self.encryptor,
724 max_volume_size=max_volume_size,
725 new_volume_handler=new_volume_handler,
726 save_to_members=False,
728 os.chdir(source_path)
730 # for each file to be in the backup, do:
731 for path in self._recursive_walk_dir('.'):
732 # calculate stat dict for current file
733 statd = self._stat_dict(path)
734 statd['path'] = u'snapshot://' + statd['path']
735 statd['volume'] = self.vol_no
738 tarobj.add(path, arcname = statd['path'], recursive=False)
740 # retrieve file offset
741 statd['offset'] = tarobj.get_last_member_offset()
742 self.logger.debug("backup %s" % statd['path'])
744 # store the stat dict in the index
745 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
746 crc = binascii.crc32(s, crc) & 0xffffffff
749 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
750 crc = binascii.crc32(s, crc) & 0xffffffff
752 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
757 index_sink.close (close_fileobj=True)
759 def create_diff_backup(self, source_path, backup_path, previous_index_path,
760 max_volume_size=None, extra_data=dict()):
765 - source_path: source path to the directory to back up.
766 - backup_path: path where the back up will be stored. Backup path will
767 be created if not existent.
768 - previous_index_path: index of the previous backup, needed to know
769 which files changed since then.
770 - max_volume_size: maximum volume size in megabytes (MB). Used to split
771 the backup in volumes. Optional (won't split in volumes by default).
773 NOTE: previous index is assumed to follow exactly the same format as
774 the index_mode setup in the constructor.
776 # check/sanitize input
777 if not isinstance(source_path, str):
778 raise Exception('Source path must be a string')
780 if not isinstance(backup_path, str):
781 raise Exception('Backup path must be a string')
783 if not os.path.exists(source_path) or not os.path.isdir(source_path):
784 raise Exception('Source path "%s" does not exist or is not a '\
785 'directory' % source_path)
787 if not isinstance(extra_data, dict):
788 raise Exception('extra_data must be a dictionary')
791 extra_data_str = json.dumps(extra_data)
793 raise Exception('extra_data is not json-serializable')
795 if not os.access(source_path, os.R_OK):
796 raise Exception('Source path "%s" is not readable' % source_path)
798 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
799 max_volume_size < 1):
800 raise Exception('max_volume_size must be a positive integer')
801 if max_volume_size != None:
802 max_volume_size = max_volume_size*1024*1024
804 if not isinstance(previous_index_path, str):
805 raise Exception('previous_index_path must be A string')
807 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
808 raise Exception('Index path "%s" does not exist or is not a '\
809 'file' % previous_index_path)
811 if not os.access(previous_index_path, os.R_OK):
812 raise Exception('Index path "%s" is not readable' % previous_index_path)
814 # try to create backup path if needed
815 if not os.path.exists(backup_path):
816 os.makedirs(backup_path)
818 if not os.access(backup_path, os.W_OK):
819 raise Exception('Backup path "%s" is not writeable' % backup_path)
821 if source_path.endswith('/'):
822 source_path = source_path[:-1]
824 if backup_path.endswith('/'):
825 backup_path = backup_path[:-1]
827 # update current time
828 self.current_time = datetime.datetime.now()
830 if self.mode not in self.__file_extensions_dict:
831 raise Exception('Unrecognized extension')
833 # setup for encrypting payload
834 if self.encryptor is None:
835 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
837 # some initialization
840 # generate the first volume name
841 vol_name = self.volume_name_func(backup_path, is_full=False,
843 tarfile_path = os.path.join(backup_path, vol_name)
848 index_name = self.index_name_func(is_full=False)
849 index_path = os.path.join(backup_path, index_name)
850 index_sink = self.open_auxiliary_file(index_path, 'w')
852 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
854 Handles the new volumes
856 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
857 volume_number=volume_number)
858 volume_path = os.path.join(backup_path, volume_name)
859 deltarobj.vol_no = volume_number
861 # we convert relative paths into absolute because CWD is changed
862 if not os.path.isabs(volume_path):
863 volume_path = os.path.join(cwd, volume_path)
865 deltarobj.logger.debug("opening volume %s" % volume_path)
866 tarobj.open_volume(volume_path)
868 # wraps some args from context into the handler
869 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
871 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
873 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
874 # calculate checksum and write into the stream
875 crc = binascii.crc32(s) & 0xFFFFffff
878 # start creating the tarfile
879 tarobj = tarfile.TarFile.open(tarfile_path,
880 mode='w' + self.mode,
881 format=tarfile.GNU_FORMAT,
882 concat='#' in self.mode,
883 encryption=self.encryptor,
884 max_volume_size=max_volume_size,
885 new_volume_handler=new_volume_handler,
886 save_to_members=False,
890 # create the iterators, first the previous index iterator, then the
891 # source path directory iterator and collate and iterate them
892 if not os.path.isabs(previous_index_path):
893 previous_index_path = os.path.join(cwd, previous_index_path)
894 index_it = self.iterate_index_path(previous_index_path)
896 os.chdir(source_path)
897 dir_it = self._recursive_walk_dir('.')
898 dir_path_it = self.jsonize_path_iterator(dir_it)
906 # for each file to be in the backup, do:
907 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
909 # if file is not in the index, it means it's a new file, so we have
914 # if the file is not in the directory iterator, it means that it has
915 # been deleted, so we need to mark it as such
918 # if the file is in both iterators, it means it might have either
919 # not changed (in which case we will just list it in our index but
920 # it will not be included in the tar file), or it might have
921 # changed, in which case we will snapshot it.
922 elif ipath and dpath:
923 if self._equal_stat_dicts(ipath, dpath):
927 # TODO: when creating chained backups (i.e. diffing from another
928 # diff), we will need to detect the type of action in the previous
929 # index, because if it was delete and dpath is None, we should
932 if action == 'snapshot':
933 # calculate stat dict for current file
935 stat['path'] = "snapshot://" + dpath['path']
936 stat['volume'] = self.vol_no
938 self.logger.debug("[STORE] %s" % dpath['path'])
941 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
943 # retrieve file offset
944 stat['offset'] = tarobj.get_last_member_offset()
945 elif action == 'delete':
946 path = self.unprefixed(ipath['path'])
948 u'path': u'delete://' + path,
949 u'type': ipath['type']
951 self.logger.debug("[DELETE] %s" % path)
953 # mark it as deleted in the backup
954 tarobj.add("/dev/null", arcname=stat['path'])
955 elif action == 'list':
957 path = self.unprefixed(ipath['path'])
958 stat['path'] = u'list://' + path
959 # unchanged files do not enter in the backup, only in the index
960 self.logger.debug("[UNCHANGED] %s" % path)
963 self.logger.warning('unknown action in create_diff_backup: {0}'
968 # store the stat dict in the index
969 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
970 crc = binascii.crc32(s, crc) & 0xffffffff
973 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
974 crc = binascii.crc32(s, crc) & 0xffffffff
976 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
985 def iterate_index_path(self, index_path):
987 Returns an index iterator. Internally, it uses a classic iterator class.
988 We do that instead of just yielding so that the iterator object can have
989 an additional function to close the file descriptor that is opened in
993 class IndexPathIterator(object):
994 def __init__(self, delta_tar, index_path):
995 self.delta_tar = delta_tar
996 self.index_path = index_path
998 self.extra_data = dict()
1008 def __enter__(self):
1010 Allows this iterator to be used with the "with" statement
1013 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
1014 # check index header
1015 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1016 if j.get("type", '') != 'python-delta-tar-index' or\
1017 j.get('version', -1) != 1:
1018 raise Exception("invalid index file format: %s" % json.dumps(j))
1020 self.extra_data = j.get('extra_data', dict())
1022 # find BEGIN-FILE-LIST, ignore other headers
1024 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1025 if j.get('type', '') == 'BEGIN-FILE-LIST':
1029 def __exit__(self, type, value, tb):
1031 Allows this iterator to be used with the "with" statement
1038 # read each file in the index and process it to do the restore
1042 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1043 except Exception as e:
1048 op_type = j.get('type', '')
1050 # when we detect the end of the list, break the loop
1051 if op_type == 'END-FILE-LIST':
1057 if op_type not in ['directory', 'file', 'link']:
1058 self.delta_tar.logger.warning('unrecognized type to be '
1059 'restored: %s, line %d' % (op_type, l_no))
1061 return self.__next__()
1065 return IndexPathIterator(self, index_path)
1067 def iterate_tar_path(self, tar_path, new_volume_handler=None):
1069 Returns a tar iterator that iterates jsonized member items that contain
1070 an additional "member" field, used by RestoreHelper.
1072 class TarPathIterator(object):
1073 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
1074 self.delta_tar = delta_tar
1075 self.tar_path = tar_path
1077 self.last_member = None
1078 self.new_volume_handler = new_volume_handler
1086 self.tar_obj.close()
1088 def __enter__(self):
1090 Allows this iterator to be used with the "with" statement
1092 if self.tar_obj is None:
1094 if self.delta_tar.password is not None:
1095 decryptor = crypto.Decrypt \
1096 (password=self.delta_tar.password,
1097 key=self.delta_tar.crypto_key)
1098 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1099 mode='r' + self.delta_tar.mode,
1100 format=tarfile.GNU_FORMAT,
1101 concat='#' in self.delta_tar.mode,
1102 encryption=decryptor,
1103 new_volume_handler=self.new_volume_handler,
1104 save_to_members=False,
1108 def __exit__(self, type, value, tb):
1110 Allows this iterator to be used with the "with" statement
1113 self.tar_obj.close()
1118 Read each member and return it as a stat dict
1120 tarinfo = self.tar_obj.__iter__().__next__()
1121 # NOTE: here we compare if tarinfo.path is the same as before
1122 # instead of comparing the tarinfo object itself because the
1123 # object itself might change for multivol tarinfos
1124 if tarinfo is None or (self.last_member is not None and\
1125 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
1128 self.last_member = tarinfo
1131 if tarinfo.isfile():
1133 elif tarinfo.isdir():
1135 elif tarinfo.islnk() or tarinfo.issym():
1140 u'path': tarinfo.path,
1141 u'mode': tarinfo.mode,
1142 u'mtime': tarinfo.mtime,
1143 u'ctime': -1, # cannot restore
1144 u'uid': tarinfo.uid,
1145 u'gid': tarinfo.gid,
1146 u'inode': -1, # cannot restore
1147 u'size': tarinfo.size,
1151 return TarPathIterator(self, tar_path, new_volume_handler)
1153 def jsonize_path_iterator(self, iter, strip=0):
1155 converts the yielded items of an iterator into json path lines.
1157 strip: Strip the smallest prefix containing num leading slashes from
1162 path = iter.__next__()
1164 yield self._stat_dict(path), 0
1166 st = self._stat_dict(path)
1167 st['path'] = "/".join(path.split("/")[strip:])
1169 except StopIteration:
1172 def iterate_disaster_index (self, index):
1174 Mimick the behavior of the other object iterators, just with the inputs
1175 supplied directly as *index*.
1178 class RawIndexIterator(object):
1179 def __init__(self, delta_tar, index):
1180 self.delta_tar = delta_tar
1190 def __enter__(self):
1192 Allows this iterator to be used with the "with" statement
1194 self.iter = self.index.__iter__ ()
1197 def __exit__(self, type, value, tb):
1199 Allows this iterator to be used with the "with" statement
1203 idxent = self.iter.__next__ ()
1206 return RawIndexIterator(self, index)
1208 def collate_iterators(self, it1, it2):
1210 Collate two iterators, so that it returns pairs of the items of each
1211 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1212 when there's no match for the items in the other iterator.
1214 It assumes that the items in both lists are ordered in the same way.
1217 elem1, elem2 = None, None
1221 elem1, l_no = it1.__next__()
1222 except StopIteration:
1224 yield (None, elem2, l_no)
1226 if isinstance(elem2, tuple):
1228 yield (None, elem2, l_no)
1232 elem2 = it2.__next__()
1233 if isinstance(elem2, tuple):
1235 except StopIteration:
1237 yield (elem1, None, l_no)
1238 for elem1, l_no in it1:
1239 yield (elem1, None, l_no)
1242 index1 = self.unprefixed(elem1['path'])
1243 index2 = self.unprefixed(elem2['path'])
1244 i1, i2 = self.compare_indexes(index1, index2)
1246 yield1 = yield2 = None
1253 yield (yield1, yield2, l_no)
1255 def compare_indexes(self, index1, index2):
1257 Compare iterator indexes and return a tuple in the following form:
1258 if index1 < index2, returns (index1, None)
1259 if index1 == index2 returns (index1, index2)
1260 else: returns (None, index2)
1262 l1 = index1.split('/')
1263 l2 = index2.split('/')
1264 length = len(l2) - len(l1)
1267 return (index1, None)
1269 return (None, index2)
1271 for i1, i2 in zip(l1, l2):
1273 return (index1, None)
1275 return (None, index2)
1277 return (index1, index2)
1279 def list_backup(self, backup_tar_path, list_func=None):
1280 if not isinstance(backup_tar_path, str):
1281 raise Exception('Backup tar path must be a string')
1283 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1284 raise Exception('Source path "%s" does not exist or is not a '\
1285 'file' % backup_tar_path)
1287 if not os.access(backup_tar_path, os.R_OK):
1288 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1292 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
1294 Handles the new volumes
1296 volume_name = deltarobj.volume_name_func(backup_path, True,
1297 volume_number, guess_name=True)
1298 volume_path = os.path.join(backup_path, volume_name)
1300 # we convert relative paths into absolute because CWD is changed
1301 if not os.path.isabs(volume_path):
1302 volume_path = os.path.join(cwd, volume_path)
1303 tarobj.open_volume(volume_path, encryption=encryption)
1305 if self.decryptor is None:
1306 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1308 backup_path = os.path.dirname(backup_tar_path)
1309 if not os.path.isabs(backup_path):
1310 backup_path = os.path.join(cwd, backup_path)
1311 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
1313 tarobj = tarfile.TarFile.open(backup_tar_path,
1314 mode='r' + self.mode,
1315 format=tarfile.GNU_FORMAT,
1316 concat='#' in self.mode,
1317 encryption=self.decryptor,
1318 new_volume_handler=new_volume_handler,
1319 save_to_members=False,
1322 def filter(cls, list_func, tarinfo):
1323 if list_func is None:
1324 self.logger.info(tarinfo.path)
1328 filter = partial(filter, self, list_func)
1330 tarobj.extractall(filter=filter)
1333 def restore_backup(self, target_path, backup_indexes_paths=[],
1334 backup_tar_path=None, restore_callback=None,
1335 disaster=tarfile.TOLERANCE_STRICT, backup_index=None):
1340 - target_path: path to restore.
1341 - backup_indexes_paths: path to backup indexes, in descending date order.
1342 The indexes indicate the location of their respective backup volumes,
1343 and multiple indexes are needed to be able to restore diff backups.
1344 Note that this is an optional parameter: if not suplied, it will
1345 try to restore directly from backup_tar_path.
1346 - backup_tar_path: path to the backup tar file. Used as an alternative
1347 to backup_indexes_paths to restore directly from a tar file without
1348 using any file index. If it's a multivol tarfile, volume_name_func
1350 - restore_callback: callback function to be called during restore.
1351 This is passed to the helper and gets called for every file.
1353 NOTE: If you want to use an index to restore a backup, this function
1354 only supports to do so when the tarfile mode is either uncompressed or
1355 uses concat compress mode, because otherwise it would be very slow.
1357 NOTE: Indices are assumed to follow the same format as the index_mode
1358 specified in the constructor.
1360 Returns the list of files that could not be restored, if there were
1363 # check/sanitize input
1364 if not isinstance(target_path, str):
1365 raise Exception('Target path must be a string')
1367 if backup_indexes_paths is None and backup_tar_path == []:
1368 raise Exception("You have to either provide index paths or a tar path")
1370 if isinstance (backup_index, list) is True:
1372 elif len(backup_indexes_paths) == 0:
1378 if not isinstance(backup_tar_path, str):
1379 raise Exception('Backup tar path must be a string')
1381 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1382 raise Exception('Source path "%s" does not exist or is not a '\
1383 'file' % backup_tar_path)
1385 if not os.access(backup_tar_path, os.R_OK):
1386 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1388 if not isinstance(backup_indexes_paths, list):
1389 raise Exception('backup_indexes_paths must be a list')
1391 if self.mode.startswith(':') or self.mode.startswith('|'):
1392 raise Exception('Restore only supports either uncompressed tars'
1393 ' or concat compression when restoring from an index, and '
1394 ' the open mode you provided is "%s"' % self.mode)
1396 for index in backup_indexes_paths:
1397 if not isinstance(index, str):
1398 raise Exception('indices must be strings')
1400 if not os.path.exists(index) or not os.path.isfile(index):
1401 raise Exception('Index path "%s" does not exist or is not a '\
1404 if not os.access(index, os.R_OK):
1405 raise Exception('Index path "%s" is not readable' % index)
1407 # try to create backup path if needed
1408 if not os.path.exists(target_path):
1409 os.makedirs(target_path)
1411 # make backup_tar_path absolute so that iterate_tar_path works fine
1412 if backup_tar_path and not os.path.isabs(backup_tar_path):
1413 backup_tar_path = os.path.abspath(backup_tar_path)
1416 os.chdir(target_path)
1418 # setup for decrypting payload
1419 if self.decryptor is None:
1420 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1423 index_it = self.iterate_tar_path(backup_tar_path)
1424 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
1425 tarobj=index_it.tar_obj)
1426 elif mode == "diff":
1427 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1430 # get iterator from newest index at _data[0]
1431 index1 = helper._data[0]["path"]
1432 index_it = self.iterate_index_path(index1)
1433 except tarfile.DecryptionError as exn:
1434 self.logger.error("failed to decrypt file [%s]: %s; is this an "
1435 "actual encrypted index file?"
1436 % (index1, str (exn)))
1437 return [(index1, exn)]
1438 except Exception as exn:
1440 self.logger.error("failed to read file [%s]: %s; is this an "
1441 "actual index file?" % (index1, str (exn)))
1442 return [(index1, exn)]
1443 elif mode == "disaster":
1444 index_it = self.iterate_disaster_index (backup_index)
1445 helper = RestoreHelper (self, cwd, backup_path=backup_tar_path,
1446 backup_index=backup_index,
1450 dir_it = self._recursive_walk_dir('.')
1451 dir_path_it = self.jsonize_path_iterator(dir_it)
1453 failed = [] # irrecoverable files
1455 # for each file to be restored, do:
1456 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1458 upath = dpath['path']
1459 op_type = dpath['type']
1461 upath = self.unprefixed(ipath['path'])
1462 op_type = ipath['type']
1465 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
1468 # if types of the file mismatch, the file needs to be deleted
1470 if ipath is not None and dpath is not None and\
1471 dpath['type'] != ipath['type']:
1472 helper.delete(upath)
1474 # if file not found in dpath, we can directly restore from index
1476 # if the file doesn't exist and it needs to be deleted, it
1477 # means that work is already done
1478 if ipath['path'].startswith('delete://'):
1481 self.logger.debug("restore %s" % ipath['path'])
1482 helper.restore(ipath, l_no, restore_callback)
1483 except Exception as e:
1484 iipath = ipath.get ("path", "")
1485 self.logger.error("FAILED to restore: {} ({})"
1487 if disaster != tarfile.TOLERANCE_STRICT:
1488 failed.append ((iipath, e))
1491 # if both files are equal, we have nothing to restore
1492 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1495 # we have to restore the file, but first we need to delete the
1496 # current existing file.
1497 # we don't delete the file if it's a directory, because it might
1498 # just have changed mtime, so it's quite inefficient to remove
1501 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
1502 helper.delete(upath)
1503 self.logger.debug("restore %s" % ipath['path'])
1505 helper.restore(ipath, l_no, restore_callback)
1506 except Exception as e:
1507 if disaster == tarfile.TOLERANCE_STRICT:
1509 failed.append ((ipath.get ("path", ""), e))
1512 # if the file is not in the index (so it comes from the target
1513 # directory) then we have to delete it
1515 self.logger.debug("delete %s" % upath)
1516 helper.delete(upath)
1518 helper.restore_directories_permissions()
1526 def recover_backup(self, target_path, backup_indexes_paths=[],
1527 restore_callback=None):
1529 Walk the index, extracting objects in disaster mode. Bad files are
1530 reported along with a reason.
1532 return self.restore_backup(target_path,
1533 backup_indexes_paths=backup_indexes_paths,
1534 disaster=tarfile.TOLERANCE_RECOVER)
1537 def rescue_backup(self, target_path, backup_tar_path,
1538 restore_callback=None):
1540 More aggressive “unfsck” mode: do not rely on the index data as the
1541 files may be corrupt; skim files for header-like information and
1542 attempt to retrieve the data.
1544 backup_index = tarfile.gen_rescue_index(backup_tar_path,
1546 password=self.password,
1547 key=self.crypto_key)
1549 return self.restore_backup(target_path,
1550 backup_index=backup_index,
1551 backup_tar_path=backup_tar_path,
1552 disaster=tarfile.TOLERANCE_RESCUE)
1555 def _parse_json_line(self, f, l_no):
1557 Read line from file like object and process it as JSON.
1562 j = json.loads(l.decode('UTF-8'))
1563 except UnicodeDecodeError as e:
1564 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1566 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1567 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1570 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1571 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1573 except ValueError as e:
1574 raise Exception("error parsing this json line "
1575 "(line number %d): %s" % (l_no, l))
1579 class RestoreHelper(object):
1581 Class used to help to restore files from indices
1584 # holds the dicts of data
1591 # list of directories to be restored. This is done as a last step, see
1592 # tarfile.extractall for details.
1595 _disaster = tarfile.TOLERANCE_STRICT
1597 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
1598 backup_index=None, tarobj=None,
1599 disaster=tarfile.TOLERANCE_STRICT):
1601 Constructor opens the tars and init the data structures.
1605 - Index list must be provided in reverse order (newer first).
1606 - “newer first” apparently means that if there are n backups
1607 provided, the last full backup is at index n-1 and the most recent
1608 diff backup is at index 0.
1609 - Only the first, the second, and the last elements of
1610 ``index_list`` are relevant, others will not be accessed.
1611 - If no ``index_list`` is provided, both ``tarobj`` and
1612 ``backup_path`` must be passed.
1613 - If ``index_list`` is provided, the values of ``tarobj`` and
1614 ``backup_path`` are ignored.
1617 self._directories = []
1618 self._deltatar = deltatar
1620 self._password = deltatar.password
1621 self._crypto_key = deltatar.crypto_key
1622 self._decryptors = []
1623 self._disaster = disaster
1630 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1631 self.canchown = True
1633 self.canchown = False
1635 if isinstance (backup_index, list) is True:
1636 decryptor = self._deltatar.decryptor
1638 [{ "curr_vol_no" : None
1642 , "path" : backup_path
1645 , "last_itelement" : None
1647 , "new_volume_handler" :
1648 partial(self.new_volume_handler,
1649 self._deltatar, self._cwd, True,
1650 os.path.dirname(backup_path), decryptor)
1651 , "decryptor" : decryptor
1653 elif index_list is not None:
1654 for index in index_list:
1655 is_full = index == index_list[-1]
1658 if self._password is not None:
1659 decryptor = crypto.Decrypt (password=self._password,
1660 key=self._crypto_key)
1662 # make paths absolute to avoid cwd problems
1663 if not os.path.isabs(index):
1664 index = os.path.normpath(os.path.join(cwd, index))
1674 last_itelement = None,
1676 new_volume_handler = partial(self.new_volume_handler,
1677 self._deltatar, self._cwd, is_full,
1678 os.path.dirname(index), decryptor),
1679 decryptor = decryptor
1681 self._data.append(s)
1683 # make paths absolute to avoid cwd problems
1684 if not os.path.isabs(backup_path):
1685 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
1687 # update the new_volume_handler of tar_obj
1688 tarobj.new_volume_handler = partial(self.new_volume_handler,
1689 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
1690 self._deltatar.decryptor)
1699 last_itelement = None,
1701 new_volume_handler = tarobj.new_volume_handler,
1702 decryptor = self._deltatar.decryptor
1704 self._data.append(s)
1709 Closes all open files
1711 for data in self._data:
1713 data['vol_fd'].close()
1714 data['vol_fd'] = None
1716 data['tarobj'].close()
1717 data['tarobj'] = None
1719 def delete(self, path):
1723 if not os.path.exists(path):
1726 # to preserve parent directory mtime, we save it
1727 parent_dir = os.path.dirname(path) or os.getcwd()
1728 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1730 if os.path.isdir(path) and not os.path.islink(path):
1735 # now we restore parent_directory mtime
1736 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1738 def restore(self, itpath, l_no, callback=None):
1740 Restore the path from the appropriate backup. Receives the current path
1741 from the newest (=first) index iterator. itpath must be not null.
1742 callback is a custom function that gets called for every file.
1744 NB: This function takes the attribute ``_data`` as input but will only
1745 ever use its first and, if available, second element. Anything else in
1746 ``._data[]`` will be ignored.
1748 path = itpath['path']
1750 # Calls the callback function
1754 if path.startswith('delete://'):
1755 # the file has previously been deleted already in restore_backup in
1756 # all cases so we just need to finish
1759 # get data from newest index (_data[0])
1760 data = self._data[0]
1761 upath = self._deltatar.unprefixed(path)
1763 # to preserve parent directory mtime, we save it
1764 parent_dir = os.path.dirname(upath) or os.getcwd()
1765 if not os.path.exists(parent_dir):
1766 os.makedirs(parent_dir)
1767 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1769 # if path is found in the newest index as to be snapshotted, deal with it
1771 if path.startswith('snapshot://'):
1772 self.restore_file(itpath, data, path, l_no, upath)
1774 # now we restore parent_directory mtime
1775 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1778 # we go from index to index, finding the path in the index, then finding
1779 # the index with the most recent snapshot of the file being restored
1781 # Right now we support diff backups, only. No incremental backups.
1782 # As a result _data[0] is always the diff backup index
1783 # and _data[1] the full backup index.
1784 if len(self._data) == 2:
1785 data = self._data[1]
1786 d, l_no, dpath = self.find_path_in_index(data, upath)
1788 self._deltatar.logger.warning('Error restoring file %s from '
1789 'index, not found in index %s' % (path, data['path']))
1792 cur_path = d.get('path', '')
1793 if cur_path.startswith('delete://'):
1794 self._deltatar.logger.warning(('Strange thing happened, file '
1795 '%s was listed in first index but deleted by another '
1796 'one. Path was ignored and untouched.') % path)
1798 elif cur_path.startswith('snapshot://'):
1799 # this code path is reached when the file is unchanged
1800 # in the newest index and therefore of type 'list://'
1801 self.restore_file(d, data, path, l_no, dpath)
1803 # now we restore parent_directory mtime
1804 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1807 # error code path is reached when:
1808 # a) we have more than two indexes (unsupported atm)
1809 # b) both indexes contain a list:// entry (logic error)
1810 # c) we have just one index and it also contains list://
1811 self._deltatar.logger.warning(('Error restoring file %s from index, '
1812 'snapshot not found in any index') % path)
1814 def find_path_in_index(self, data, upath):
1815 # NOTE: we restart the iterator sometimes because the iterator can be
1816 # walked over completely multiple times, for example if one path if not
1817 # found in one index and we have to go to the next index.
1818 it = data['iterator']
1820 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
1821 d, l_no = it.__next__()
1823 d = data['last_itelement']
1824 l_no = data['last_lno']
1827 dpath = self._deltatar.unprefixed(d.get('path', ''))
1829 data['last_itelement'] = d
1830 data['last_lno'] = l_no
1831 return d, l_no, dpath
1833 up, dp = self._deltatar.compare_indexes(upath, dpath)
1834 # any time upath should have appeared before current dpath, it means
1835 # upath is just not in this index and we should stop
1837 data['last_itelement'] = d
1838 data['last_lno'] = l_no
1842 d, l_no = it.__next__()
1843 except StopIteration:
1844 data['last_itelement'] = d
1845 data['last_lno'] = l_no
1848 def restore_directories_permissions(self):
1850 Restore directory permissions when everything have been restored
1857 self._directories.sort(key=operator.attrgetter('name'))
1858 self._directories.reverse()
1860 # Set correct owner, mtime and filemode on directories.
1861 for member in self._directories:
1862 dirpath = member.name
1864 os.chmod(dirpath, member.mode)
1865 os.utime(dirpath, (member.mtime, member.mtime))
1867 # We have to be root to do so.
1869 g = grp.getgrnam(member.gname)[2]
1873 u = pwd.getpwnam(member.uname)[2]
1877 if member.issym and hasattr(os, "lchown"):
1878 os.lchown(dirpath, u, g)
1880 os.chown(dirpath, u, g)
1881 except EnvironmentError:
1882 raise tarfile.ExtractError("could not change owner")
1884 except tarfile.ExtractError as e:
1885 self._deltatar.logger.warning('tarfile: %s' % e)
1888 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
1890 Handles the new volumes
1892 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1893 volume_number, guess_name=True)
1894 volume_path = os.path.join(backup_path, volume_name)
1896 # we convert relative paths into absolute because CWD is changed
1897 if not os.path.isabs(volume_path):
1898 volume_path = os.path.join(cwd, volume_path)
1899 tarobj.open_volume(volume_path, encryption=encryption)
1901 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
1903 Restores a snapshot of a file from a specific backup
1905 op_type = file_data.get('type', -1)
1906 member = file_data.get('member', None)
1907 ismember = bool(member)
1909 # when member is set, then we can assume everything is right and we
1910 # just have to restore the path
1912 vol_no = file_data.get('volume', -1)
1914 if not isinstance(vol_no, int) or vol_no < 0:
1915 self._deltatar.logger.warning('unrecognized type to be restored: '
1916 '%s, line %d' % (op_type, l_no))
1918 # setup the volume that needs to be read. only needed when member is
1920 if index_data['curr_vol_no'] != vol_no:
1921 index_data['curr_vol_no'] = vol_no
1922 backup_path = os.path.dirname(index_data['path'])
1923 vol_name = self._deltatar.volume_name_func(backup_path,
1924 index_data['is_full'], vol_no, guess_name=True)
1925 vol_path = os.path.join(backup_path, vol_name)
1926 if index_data['vol_fd']:
1927 index_data['vol_fd'].close()
1928 index_data['vol_fd'] = open(vol_path, 'rb')
1930 # force reopen of the tarobj because of new volume
1931 if index_data['tarobj']:
1932 index_data['tarobj'].close()
1933 index_data['tarobj'] = None
1935 # seek tarfile if needed
1936 offset = file_data.get('offset', -1)
1937 if index_data['tarobj']:
1938 if self._disaster == tarfile.TOLERANCE_RESCUE:
1939 # force a seek and reopen
1940 index_data['tarobj'].close()
1941 index_data['tarobj'] = None
1944 member = index_data['tarobj'].__iter__().__next__()
1945 except tarfile.DecryptionError:
1947 except tarfile.CompressionError:
1950 if not member or member.path != file_data['path']:
1951 # force a seek and reopen
1952 index_data['tarobj'].close()
1953 index_data['tarobj'] = None
1956 # open the tarfile if needed
1957 if not index_data['tarobj']:
1958 index_data['vol_fd'].seek(offset)
1959 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1960 fileobj=index_data['vol_fd'],
1961 format=tarfile.GNU_FORMAT,
1962 concat='#' in self._deltatar.mode,
1963 encryption=index_data["decryptor"],
1964 new_volume_handler=index_data['new_volume_handler'],
1965 save_to_members=False,
1966 tolerance=self._disaster)
1968 member = index_data['tarobj'].__iter__().__next__()
1970 member.path = unprefixed_path
1971 member.name = unprefixed_path
1973 if op_type == 'directory':
1974 self.add_member_dir(member)
1975 member = copy.copy(member)
1976 member.mode = 0o0700
1978 # if it's an existing directory, we then don't need to recreate it
1979 # just set the right permissions, mtime and that kind of stuff
1980 if os.path.exists(member.path):
1984 # set current volume number in tarobj, otherwise the extraction of the
1985 # file might fail when trying to extract a multivolume member
1986 index_data['tarobj'].volume_number = index_data['curr_vol_no']
1988 def ignore_symlink (member, *_args):
1989 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
1991 # finally, restore the file
1992 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
1994 def add_member_dir(self, member):
1996 Add member dir to be restored at the end
1998 if not self.canchown:
1999 self._directories.append(DirItem(name=member.name, mode=member.mode,
2000 mtime=member.mtime))
2002 self._directories.append(DirItem(name=member.name, mode=member.mode,
2003 mtime=member.mtime, gname=member.gname, uname=member.uname,
2004 uid=member.uid, gid=member.gid, issym=member.issym()))
2006 class DirItem(object):
2007 def __init__(self, **kwargs):
2008 for k, v in kwargs.items():