3 # Copyright (C) 2013, 2014 Intra2net AG
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published
7 # by the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see
17 # <http://www.gnu.org/licenses/lgpl-3.0.html>
19 # Author: Eduardo Robles Elvira <edulix@wadobo.com>
21 DELTATAR_HEADER_VERSION = 1
22 DELTATAR_PARAMETER_VERSION = 1
35 from functools import partial
40 class NullHandler(logging.Handler):
41 def emit(self, record):
45 logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
53 # encryption direction
54 CRYPTO_MODE_ENCRYPT = 0
55 CRYPTO_MODE_DECRYPT = 1
57 # The canonical extension for encrypted backup files regardless of the actual
58 # encryption parameters is “.pdtcrypt”. This is analogous to the encryption
59 # header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
60 # Since the introduction of the versioned header there no longer any need
61 # for encoding encryption parameters in the file extensions (“.aes128” and
63 PDTCRYPT_EXTENSION = "pdtcrypt"
67 AUXILIARY_FILE_INDEX = 0
68 AUXILIARY_FILE_INFO = 1
70 class DeltaTar(object):
72 Backup class used to create backups
75 # list of files to exclude in the backup creation or restore operation. It
76 # can contain python regular expressions.
79 # list of files to include in the backup creation or restore operation. It
80 # can contain python regular expressions. If empty, all files in the source
81 # path will be backed up (when creating a backup) or all the files in the
82 # backup will be restored (when restoring a backup), but if included_files
83 # is set then only the files include in the list will be processed.
86 # custom filter of files to be backed up (or restored). Unused and unset
87 # by default. The function receives a file path and must return a boolean.
90 # mode in which the delta will be created (when creating a backup) or
91 # opened (when restoring). Accepts modes analog to the tarfile library.
94 # used together with aes modes to encrypt and decrypt backups.
99 # parameter version to use when encrypting; note that this has no effect
100 # on decryption since the required settings are determined from the headers
101 crypto_version = DELTATAR_HEADER_VERSION
102 crypto_paramversion = None
104 # when encrypting or decrypting, these hold crypto handlers; created before
105 # establishing the Tarfile stream iff a password is supplied.
109 # python logger object.
112 # specifies the index mode in the same format as @param mode, but without
113 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
114 # that the index is encrypted if no password is given in the constructor.
117 # current time for this backup. Used for file names and file creation checks
120 # extra data to included in the header of the index file when creating a
124 # valid tarfile modes and their corresponding default file extension
125 __file_extensions_dict = {
134 '#gz.pdtcrypt': '.gz',
139 # valid index modes and their corresponding default file extension
140 __index_extensions_dict = {
144 'gz.pdtcrypt': '.gz',
148 # valid path prefixes
149 __path_prefix_list = [
155 def __init__(self, excluded_files=[], included_files=[],
156 filter_func=None, mode="", password=None,
157 crypto_key=None, nacl=None,
158 crypto_version=DELTATAR_HEADER_VERSION,
159 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
160 logger=None, index_mode=None, index_name_func=None,
161 volume_name_func=None):
163 Constructor. Configures the diff engine.
166 - excluded_files: list of files to exclude in the backup creation or
167 restore operation. It can contain python regular expressions.
169 - included_files: list of files to include in the backup creation or
170 restore operation. It can contain python regular expressions. If
171 empty, all files in the source path will be backed up (when creating a
172 backup) or all the files in the backup will be restored (when
173 restoring a backup), but if included_files is set then only the files
174 include in the list will be processed.
176 - filter_func: custom filter of files to be backed up (or restored).
177 Unused and unset by default. The function receives a file path and
178 must return a boolean.
180 - mode: mode in which the delta will be created (when creating a backup)
181 or opened (when restoring). Accepts the same modes as the tarfile
182 library. Valid modes are:
185 ':' open uncompressed
186 ':gz' open with gzip compression
187 ':bz2' open with bzip2 compression
188 '|' open an uncompressed stream of tar blocks
189 '|gz' open a gzip compressed stream of tar blocks
190 '|bz2' open a bzip2 compressed stream of tar blocks
191 '#gz' open a stream of gzip compressed tar blocks
193 - crypto_key: used to encrypt and decrypt backups. Encryption will
194 be enabled automatically if a key is supplied. Requires a salt to be
197 - nacl: salt that was used to derive the encryption key for embedding
198 in the PDTCRYPT header. Not needed when decrypting and when
199 encrypting with password.
201 - password: used to encrypt and decrypt backups. Encryption will be
202 enabled automatically if a password is supplied.
204 - crypto_version: version of the format, determining the kind of PDT
207 - crypto_paramversion: optionally request encryption conforming to
208 a specific parameter version. Defaults to the standard PDT value
209 which as of 2017 is the only one available.
211 - logger: python logger object. Optional.
213 - index_mode: specifies the index mode in the same format as @param
214 mode, but without the ':', '|' or '#' at the begining. If encryption
215 is requested it will extend to the auxiliary (index, info) files as
216 well. This is an optional parameter that will automatically mimic
217 @param mode by default if not provided. Valid modes are:
220 'gz' open with gzip compression
221 'bz2' open with bzip2 compression
223 - index_name_func: function that sets a custom name for the index file.
224 This function receives a flag to indicate whether the name will be
225 used for a full or diff backup. The backup path will be prepended to
228 - volume_name_func: function that defines the name of tar volumes. It
229 receives the backup_path, if it's a full backup and the volume number,
230 and must return the name for the corresponding volume name. Optional,
231 DeltaTar has default names for tar volumes.
234 if mode not in self.__file_extensions_dict:
235 raise Exception('Unrecognized extension mode=[%s] requested for files'
238 self.excluded_files = excluded_files
239 self.included_files = included_files
240 self.filter_func = filter_func
241 self.logger = logging.getLogger('deltatar.DeltaTar')
243 self.logger.addHandler(logger)
246 if crypto_key is not None:
247 self.crypto_key = crypto_key
248 self.nacl = nacl # encryption only
250 if password is not None:
251 self.password = password
253 if crypto_version is not None:
254 self.crypto_version = crypto_version
256 if crypto_paramversion is not None:
257 self.crypto_paramversion = crypto_paramversion
259 # generate index_mode
260 if index_mode is None:
266 elif mode not in self.__index_extensions_dict:
267 raise Exception('Unrecognized extension mode=[%s] requested for index'
270 self.index_mode = index_mode
271 self.current_time = datetime.datetime.now()
273 if index_name_func is not None:
274 self.index_name_func = index_name_func
276 if volume_name_func is not None:
277 self.volume_name_func = volume_name_func
279 def pick_extension(self, kind, mode=None):
281 Choose the extension depending on a) the kind of file given, b) the
282 processing mode, and c) the current encryption settings.
285 if kind == PDT_TYPE_ARCHIVE:
288 mode = self.__index_extensions_dict [self.index_mode]
290 if self.crypto_key is not None or self.password is not None:
291 ret += "." + PDTCRYPT_EXTENSION
294 def index_name_func(self, is_full): # pylint: disable=method-hidden
296 Callback for setting a custom name for the index file. Depending on
297 whether *is_full* is set, it will create a suitable name for a full
300 prefix = "bfull" if is_full else "bdiff"
301 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
302 extension = self.pick_extension \
304 self.__index_extensions_dict [self.index_mode])
306 return "%s-%s.index%s" % (prefix, date_str, extension)
308 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
309 is_full, volume_number,
312 function that defines the name of tar volumes. It receives the
313 backup_path, if it's a full backup and the volume number, and must return
314 the name for the corresponding volume name. Optional, DeltaTar has default
315 names for tar volumes.
317 If guess_name is activated, the file is intended not to be created but
318 to be found, and thus the date will be guessed.
320 prefix = "bfull" if is_full else "bdiff"
321 extension = self.pick_extension \
323 self.__file_extensions_dict [self.mode])
326 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
327 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
329 prefix = prefix + "-"
330 postfix = "-%03d%s" % (volume_number + 1, extension)
331 for f in os.listdir(backup_path):
332 if f.startswith(prefix) and f.endswith(postfix):
334 raise Exception("volume not found")
337 def filter_path(self, path, source_path="", is_dir=None):
339 Filters a path, given the source_path, using the filtering properties
340 set in the constructor.
341 The filtering order is:
342 1. included_files (if any)
344 3. filter_func (which must return whether the file is accepted or not)
347 if len(source_path) > 0:
348 # ensure that exactly one '/' at end of dir is also removed
349 source_path = source_path.rstrip(os.sep) + os.sep
350 path = path[len(source_path):]
352 # 1. filter included_files
354 if len(self.included_files) > 0:
356 for i in self.included_files:
357 # it can be either a regexp or a string
358 if isinstance(i, str):
359 # if the string matches, then continue
364 # if the string ends with / it's a directory, and if the
365 # path is contained in it, it is included
366 if i.endswith('/') and path.startswith(i):
370 # if the string doesn't end with /, add it and do the same
372 elif path.startswith(i + '/'):
376 # check for PARENT_MATCH
379 if not dir_path.endswith('/'):
382 if i.startswith(dir_path):
385 # if it's a reg exp, then we just check if it matches
386 elif isinstance(i, re._pattern_type):
391 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
393 if match == NO_MATCH:
396 # when a directory is in PARENT_MATCH, it doesn't matter if it's
397 # excluded. It's subfiles will be excluded, but the directory itself
399 if match != PARENT_MATCH:
400 for e in self.excluded_files:
401 # it can be either a regexp or a string
402 if isinstance(e, str):
403 # if the string matches, then exclude
407 # if the string ends with / it's a directory, and if the
408 # path starts with the directory, then exclude
409 if e.endswith('/') and path.startswith(e):
412 # if the string doesn't end with /, do the same check with
414 elif path.startswith(e + '/'):
417 # if it's a reg exp, then we just check if it matches
418 elif isinstance(e, re._pattern_type):
422 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
425 return self.filter_func(path)
429 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
431 Walk a directory recursively, yielding each file/directory
434 source_path = source_path.rstrip(os.sep)
439 beginning_size = len(source_path) + 1 # +1 for os.sep
441 queue = [source_path]
444 cur_path = queue.pop(0)
446 # it might have been removed in the mean time
447 if not os.path.exists(cur_path):
450 for filename in sorted(os.listdir(cur_path)):
451 child = os.path.join(cur_path, filename)
452 is_dir = os.path.isdir(child)
453 status = self.filter_path(child, source_path, is_dir)
454 if status == NO_MATCH:
456 if not os.access(child, os.R_OK):
457 self.logger.warning('Error accessing possibly locked file %s' % child)
461 yield child[beginning_size:]
463 if is_dir and (status == MATCH or status == PARENT_MATCH):
466 def _stat_dict(self, path):
468 Returns a dict with the stat data used to compare files
470 stinfo = os.stat(path)
471 mode = stinfo.st_mode
474 if stat.S_ISDIR(mode):
476 elif stat.S_ISREG(mode):
478 elif stat.S_ISLNK(mode):
485 u'mtime': int(stinfo.st_mtime),
486 u'ctime': int(stinfo.st_ctime),
487 u'uid': stinfo.st_uid,
488 u'gid': stinfo.st_gid,
489 u'inode': stinfo.st_ino,
490 u'size': stinfo.st_size
493 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
495 Return if the dicts are equal in the stat keys
497 keys = [u'type', u'mode',u'size', u'mtime',
498 # not restored: u'inode', u'ctime'
501 # only if user is root, then also check gid/uid. otherwise do not check it,
502 # because tarfile can chown in case of being superuser only
504 # also, skip the check in rpmbuild since the sources end up with the
505 # uid:gid of the packager while the extracted files are 0:0.
506 if hasattr(os, "geteuid") and os.geteuid() == 0 \
507 and os.getenv ("RPMBUILD_OPTIONS") is None:
511 if (not d1 and d2 != None) or (d1 != None and not d2):
514 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
517 type = d1.get('type', '')
520 # size doesn't matter for directories
521 if type == 'directory' and key == 'size':
523 if d1.get(key, -1) != d2.get(key, -2):
527 def prefixed(self, path, listsnapshot_equal=False):
529 if a path is not prefixed, return it prefixed
531 for prefix in self.__path_prefix_list:
532 if path.startswith(prefix):
533 if listsnapshot_equal and prefix == u'list://':
534 return u'snapshot://' + path[len(prefix):]
536 return u'snapshot://' + path
538 def unprefixed(self, path):
540 remove a path prefix if any
542 for prefix in self.__path_prefix_list:
543 if path.startswith(prefix):
544 return path[len(prefix):]
548 def initialize_encryption (self, mode):
549 password = self.password
550 key = self.crypto_key
553 if key is None and password is None:
555 if mode == CRYPTO_MODE_ENCRYPT:
556 return crypto.Encrypt (password=password,
559 version=self.crypto_version,
560 paramversion=self.crypto_paramversion)
561 if mode == CRYPTO_MODE_DECRYPT:
562 return crypto.Decrypt (password=password, key=key)
564 raise Exception ("invalid encryption mode [%r]" % mode)
567 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
569 Given the specified configuration, opens a file for reading or writing,
570 inheriting the encryption and compression settings from the backup.
571 Returns a file object ready to use.
573 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
576 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
577 Both the info and the auxiliary file have a globally
578 unique, constant counter value.
581 if self.index_mode.startswith('gz'):
583 elif self.index_mode.startswith('bz2'):
591 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
593 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
595 if crypto_ctx is not None:
596 if kind == AUXILIARY_FILE_INFO:
597 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
598 elif kind == AUXILIARY_FILE_INDEX:
599 enccounter = crypto.AES_GCM_IV_CNT_INDEX
601 raise Exception ("invalid kind of aux file %r" % kind)
603 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
604 bufsize=tarfile.RECORDSIZE, fileobj=None,
605 encryption=crypto_ctx, enccounter=enccounter)
610 def create_full_backup(self, source_path, backup_path,
611 max_volume_size=None, extra_data=dict()):
613 Creates a full backup.
616 - source_path: source path to the directory to back up.
617 - backup_path: path where the back up will be stored. Backup path will
618 be created if not existent.
619 - max_volume_size: maximum volume size in megabytes. Used to split the
620 backup in volumes. Optional (won't split in volumes by default).
621 - extra_data: a json-serializable dictionary with information that you
622 want to be included in the header of the index file
625 if not isinstance(source_path, str):
626 raise Exception('Source path must be a string')
628 if not isinstance(backup_path, str):
629 raise Exception('Backup path must be a string')
631 if not os.path.exists(source_path) or not os.path.isdir(source_path):
632 raise Exception('Source path "%s" does not exist or is not a '\
633 'directory' % source_path)
635 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
636 max_volume_size < 1):
637 raise Exception('max_volume_size must be a positive integer')
638 if max_volume_size != None:
639 max_volume_size = max_volume_size*1024*1024
641 if not isinstance(extra_data, dict):
642 raise Exception('extra_data must be a dictionary')
645 extra_data_str = json.dumps(extra_data)
647 raise Exception('extra_data is not json-serializable')
649 if not os.access(source_path, os.R_OK):
650 raise Exception('Source path "%s" is not readable' % source_path)
652 # try to create backup path if needed
653 if not os.path.exists(backup_path):
654 os.makedirs(backup_path)
656 if not os.access(backup_path, os.W_OK):
657 raise Exception('Backup path "%s" is not writeable' % backup_path)
659 if source_path.endswith('/'):
660 source_path = source_path[:-1]
662 if backup_path.endswith('/'):
663 backup_path = backup_path[:-1]
665 # update current time
666 self.current_time = datetime.datetime.now()
668 if self.mode not in self.__file_extensions_dict:
669 raise Exception('Unrecognized extension')
671 # setup for encrypting payload
672 if self.encryptor is None:
673 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
675 # some initialization
678 # generate the first volume name
679 vol_name = self.volume_name_func(backup_path, True, 0)
680 tarfile_path = os.path.join(backup_path, vol_name)
683 index_name = self.index_name_func(True)
684 index_path = os.path.join(backup_path, index_name)
685 index_sink = self.open_auxiliary_file(index_path, 'w')
689 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
691 Handles the new volumes
693 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
694 volume_path = os.path.join(backup_path, volume_name)
695 deltarobj.vol_no = volume_number
697 # we convert relative paths into absolute because CWD is changed
698 if not os.path.isabs(volume_path):
699 volume_path = os.path.join(cwd, volume_path)
701 if tarobj.fileobj is not None:
702 tarobj.fileobj.close()
704 deltarobj.logger.debug("opening volume %s" % volume_path)
706 tarobj.open_volume(volume_path, encryption=encryption)
708 # wraps some args from context into the handler
709 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
711 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
713 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
714 # calculate checksum and write into the stream
715 crc = binascii.crc32(s) & 0xFFFFffff
718 # start creating the tarfile
719 tarobj = tarfile.TarFile.open(tarfile_path,
720 mode='w' + self.mode,
721 format=tarfile.GNU_FORMAT,
722 concat='#' in self.mode,
723 encryption=self.encryptor,
724 max_volume_size=max_volume_size,
725 new_volume_handler=new_volume_handler,
726 save_to_members=False,
728 os.chdir(source_path)
730 # for each file to be in the backup, do:
731 for path in self._recursive_walk_dir('.'):
732 # calculate stat dict for current file
733 statd = self._stat_dict(path)
734 statd['path'] = u'snapshot://' + statd['path']
735 statd['volume'] = self.vol_no
738 tarobj.add(path, arcname = statd['path'], recursive=False)
740 # retrieve file offset
741 statd['offset'] = tarobj.get_last_member_offset()
742 self.logger.debug("backup %s" % statd['path'])
744 # store the stat dict in the index
745 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
746 crc = binascii.crc32(s, crc) & 0xffffffff
749 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
750 crc = binascii.crc32(s, crc) & 0xffffffff
752 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
757 index_sink.close (close_fileobj=True)
759 def create_diff_backup(self, source_path, backup_path, previous_index_path,
760 max_volume_size=None, extra_data=dict()):
765 - source_path: source path to the directory to back up.
766 - backup_path: path where the back up will be stored. Backup path will
767 be created if not existent.
768 - previous_index_path: index of the previous backup, needed to know
769 which files changed since then.
770 - max_volume_size: maximum volume size in megabytes (MB). Used to split
771 the backup in volumes. Optional (won't split in volumes by default).
773 NOTE: previous index is assumed to follow exactly the same format as
774 the index_mode setup in the constructor.
776 # check/sanitize input
777 if not isinstance(source_path, str):
778 raise Exception('Source path must be a string')
780 if not isinstance(backup_path, str):
781 raise Exception('Backup path must be a string')
783 if not os.path.exists(source_path) or not os.path.isdir(source_path):
784 raise Exception('Source path "%s" does not exist or is not a '\
785 'directory' % source_path)
787 if not isinstance(extra_data, dict):
788 raise Exception('extra_data must be a dictionary')
791 extra_data_str = json.dumps(extra_data)
793 raise Exception('extra_data is not json-serializable')
795 if not os.access(source_path, os.R_OK):
796 raise Exception('Source path "%s" is not readable' % source_path)
798 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
799 max_volume_size < 1):
800 raise Exception('max_volume_size must be a positive integer')
801 if max_volume_size != None:
802 max_volume_size = max_volume_size*1024*1024
804 if not isinstance(previous_index_path, str):
805 raise Exception('previous_index_path must be A string')
807 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
808 raise Exception('Index path "%s" does not exist or is not a '\
809 'file' % previous_index_path)
811 if not os.access(previous_index_path, os.R_OK):
812 raise Exception('Index path "%s" is not readable' % previous_index_path)
814 # try to create backup path if needed
815 if not os.path.exists(backup_path):
816 os.makedirs(backup_path)
818 if not os.access(backup_path, os.W_OK):
819 raise Exception('Backup path "%s" is not writeable' % backup_path)
821 if source_path.endswith('/'):
822 source_path = source_path[:-1]
824 if backup_path.endswith('/'):
825 backup_path = backup_path[:-1]
827 # update current time
828 self.current_time = datetime.datetime.now()
830 if self.mode not in self.__file_extensions_dict:
831 raise Exception('Unrecognized extension')
833 # setup for encrypting payload
834 if self.encryptor is None:
835 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
837 # some initialization
840 # generate the first volume name
841 vol_name = self.volume_name_func(backup_path, is_full=False,
843 tarfile_path = os.path.join(backup_path, vol_name)
848 index_name = self.index_name_func(is_full=False)
849 index_path = os.path.join(backup_path, index_name)
850 index_sink = self.open_auxiliary_file(index_path, 'w')
852 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
854 Handles the new volumes
856 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
857 volume_number=volume_number)
858 volume_path = os.path.join(backup_path, volume_name)
859 deltarobj.vol_no = volume_number
861 # we convert relative paths into absolute because CWD is changed
862 if not os.path.isabs(volume_path):
863 volume_path = os.path.join(cwd, volume_path)
865 deltarobj.logger.debug("opening volume %s" % volume_path)
866 tarobj.open_volume(volume_path)
868 # wraps some args from context into the handler
869 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
871 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
873 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
874 # calculate checksum and write into the stream
875 crc = binascii.crc32(s) & 0xFFFFffff
878 # start creating the tarfile
879 tarobj = tarfile.TarFile.open(tarfile_path,
880 mode='w' + self.mode,
881 format=tarfile.GNU_FORMAT,
882 concat='#' in self.mode,
883 encryption=self.encryptor,
884 max_volume_size=max_volume_size,
885 new_volume_handler=new_volume_handler,
886 save_to_members=False,
890 # create the iterators, first the previous index iterator, then the
891 # source path directory iterator and collate and iterate them
892 if not os.path.isabs(previous_index_path):
893 previous_index_path = os.path.join(cwd, previous_index_path)
894 index_it = self.iterate_index_path(previous_index_path)
896 os.chdir(source_path)
897 dir_it = self._recursive_walk_dir('.')
898 dir_path_it = self.jsonize_path_iterator(dir_it)
906 # for each file to be in the backup, do:
907 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
909 # if file is not in the index, it means it's a new file, so we have
914 # if the file is not in the directory iterator, it means that it has
915 # been deleted, so we need to mark it as such
918 # if the file is in both iterators, it means it might have either
919 # not changed (in which case we will just list it in our index but
920 # it will not be included in the tar file), or it might have
921 # changed, in which case we will snapshot it.
922 elif ipath and dpath:
923 if self._equal_stat_dicts(ipath, dpath):
927 # TODO: when creating chained backups (i.e. diffing from another
928 # diff), we will need to detect the type of action in the previous
929 # index, because if it was delete and dpath is None, we should
932 if action == 'snapshot':
933 # calculate stat dict for current file
935 stat['path'] = "snapshot://" + dpath['path']
936 stat['volume'] = self.vol_no
938 self.logger.debug("[STORE] %s" % dpath['path'])
941 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
943 # retrieve file offset
944 stat['offset'] = tarobj.get_last_member_offset()
945 elif action == 'delete':
946 path = self.unprefixed(ipath['path'])
948 u'path': u'delete://' + path,
949 u'type': ipath['type']
951 self.logger.debug("[DELETE] %s" % path)
953 # mark it as deleted in the backup
954 tarobj.add("/dev/null", arcname=stat['path'])
955 elif action == 'list':
957 path = self.unprefixed(ipath['path'])
958 stat['path'] = u'list://' + path
959 # unchanged files do not enter in the backup, only in the index
960 self.logger.debug("[UNCHANGED] %s" % path)
963 self.logger.warning('unknown action in create_diff_backup: {0}'
968 # store the stat dict in the index
969 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
970 crc = binascii.crc32(s, crc) & 0xffffffff
973 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
974 crc = binascii.crc32(s, crc) & 0xffffffff
976 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
985 def iterate_index_path(self, index_path):
987 Returns an index iterator. Internally, it uses a classic iterator class.
988 We do that instead of just yielding so that the iterator object can have
989 an additional function to close the file descriptor that is opened in
993 class IndexPathIterator(object):
994 def __init__(self, delta_tar, index_path):
995 self.delta_tar = delta_tar
996 self.index_path = index_path
998 self.extra_data = dict()
1008 def __enter__(self):
1010 Allows this iterator to be used with the "with" statement
1013 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
1014 # check index header
1015 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1016 if j.get("type", '') != 'python-delta-tar-index' or\
1017 j.get('version', -1) != 1:
1018 raise Exception("invalid index file format: %s" % json.dumps(j))
1020 self.extra_data = j.get('extra_data', dict())
1022 # find BEGIN-FILE-LIST, ignore other headers
1024 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1025 if j.get('type', '') == 'BEGIN-FILE-LIST':
1029 def __exit__(self, type, value, tb):
1031 Allows this iterator to be used with the "with" statement
1038 # read each file in the index and process it to do the restore
1042 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1043 except Exception as e:
1048 op_type = j.get('type', '')
1050 # when we detect the end of the list, break the loop
1051 if op_type == 'END-FILE-LIST':
1057 if op_type not in ['directory', 'file', 'link']:
1058 self.delta_tar.logger.warning('unrecognized type to be '
1059 'restored: %s, line %d' % (op_type, l_no))
1061 return self.__next__()
1065 return IndexPathIterator(self, index_path)
1067 def iterate_tar_path(self, tar_path, new_volume_handler=None):
1069 Returns a tar iterator that iterates jsonized member items that contain
1070 an additional "member" field, used by RestoreHelper.
1072 class TarPathIterator(object):
1073 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
1074 self.delta_tar = delta_tar
1075 self.tar_path = tar_path
1077 self.last_member = None
1078 self.new_volume_handler = new_volume_handler
1086 self.tar_obj.close()
1088 def __enter__(self):
1090 Allows this iterator to be used with the "with" statement
1092 if self.tar_obj is None:
1094 if self.delta_tar.password is not None:
1095 decryptor = crypto.Decrypt \
1096 (password=self.delta_tar.password,
1097 key=self.delta_tar.crypto_key)
1098 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1099 mode='r' + self.delta_tar.mode,
1100 format=tarfile.GNU_FORMAT,
1101 concat='#' in self.delta_tar.mode,
1102 encryption=decryptor,
1103 new_volume_handler=self.new_volume_handler,
1104 save_to_members=False,
1108 def __exit__(self, type, value, tb):
1110 Allows this iterator to be used with the "with" statement
1113 self.tar_obj.close()
1118 Read each member and return it as a stat dict
1120 tarinfo = self.tar_obj.__iter__().__next__()
1121 # NOTE: here we compare if tarinfo.path is the same as before
1122 # instead of comparing the tarinfo object itself because the
1123 # object itself might change for multivol tarinfos
1124 if tarinfo is None or (self.last_member is not None and\
1125 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
1128 self.last_member = tarinfo
1131 if tarinfo.isfile():
1133 elif tarinfo.isdir():
1135 elif tarinfo.islnk() or tarinfo.issym():
1140 u'path': tarinfo.path,
1141 u'mode': tarinfo.mode,
1142 u'mtime': tarinfo.mtime,
1143 u'ctime': -1, # cannot restore
1144 u'uid': tarinfo.uid,
1145 u'gid': tarinfo.gid,
1146 u'inode': -1, # cannot restore
1147 u'size': tarinfo.size,
1151 return TarPathIterator(self, tar_path, new_volume_handler)
1153 def jsonize_path_iterator(self, iter, strip=0):
1155 converts the yielded items of an iterator into json path lines.
1157 strip: Strip the smallest prefix containing num leading slashes from
1162 path = iter.__next__()
1164 yield self._stat_dict(path), 0
1166 st = self._stat_dict(path)
1167 st['path'] = "/".join(path.split("/")[strip:])
1169 except StopIteration:
1172 def collate_iterators(self, it1, it2):
1174 Collate two iterators, so that it returns pairs of the items of each
1175 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1176 when there's no match for the items in the other iterator.
1178 It assumes that the items in both lists are ordered in the same way.
1181 elem1, elem2 = None, None
1185 elem1, l_no = it1.__next__()
1186 except StopIteration:
1188 yield (None, elem2, l_no)
1190 if isinstance(elem2, tuple):
1192 yield (None, elem2, l_no)
1196 elem2 = it2.__next__()
1197 if isinstance(elem2, tuple):
1199 except StopIteration:
1201 yield (elem1, None, l_no)
1202 for elem1, l_no in it1:
1203 yield (elem1, None, l_no)
1206 index1 = self.unprefixed(elem1['path'])
1207 index2 = self.unprefixed(elem2['path'])
1208 i1, i2 = self.compare_indexes(index1, index2)
1210 yield1 = yield2 = None
1217 yield (yield1, yield2, l_no)
1219 def compare_indexes(self, index1, index2):
1221 Compare iterator indexes and return a tuple in the following form:
1222 if index1 < index2, returns (index1, None)
1223 if index1 == index2 returns (index1, index2)
1224 else: returns (None, index2)
1226 l1 = index1.split('/')
1227 l2 = index2.split('/')
1228 length = len(l2) - len(l1)
1231 return (index1, None)
1233 return (None, index2)
1235 for i1, i2 in zip(l1, l2):
1237 return (index1, None)
1239 return (None, index2)
1241 return (index1, index2)
1243 def list_backup(self, backup_tar_path, list_func=None):
1244 if not isinstance(backup_tar_path, str):
1245 raise Exception('Backup tar path must be a string')
1247 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1248 raise Exception('Source path "%s" does not exist or is not a '\
1249 'file' % backup_tar_path)
1251 if not os.access(backup_tar_path, os.R_OK):
1252 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1256 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
1258 Handles the new volumes
1260 volume_name = deltarobj.volume_name_func(backup_path, True,
1261 volume_number, guess_name=True)
1262 volume_path = os.path.join(backup_path, volume_name)
1264 # we convert relative paths into absolute because CWD is changed
1265 if not os.path.isabs(volume_path):
1266 volume_path = os.path.join(cwd, volume_path)
1267 tarobj.open_volume(volume_path, encryption=encryption)
1269 if self.decryptor is None:
1270 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1272 backup_path = os.path.dirname(backup_tar_path)
1273 if not os.path.isabs(backup_path):
1274 backup_path = os.path.join(cwd, backup_path)
1275 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
1277 tarobj = tarfile.TarFile.open(backup_tar_path,
1278 mode='r' + self.mode,
1279 format=tarfile.GNU_FORMAT,
1280 concat='#' in self.mode,
1281 encryption=self.decryptor,
1282 new_volume_handler=new_volume_handler,
1283 save_to_members=False,
1286 def filter(cls, list_func, tarinfo):
1287 if list_func is None:
1288 self.logger.info(tarinfo.path)
1292 filter = partial(filter, self, list_func)
1294 tarobj.extractall(filter=filter)
1297 def restore_backup(self, target_path, backup_indexes_paths=[],
1298 backup_tar_path=None, restore_callback=None,
1299 disaster=tarfile.TOLERANCE_STRICT):
1304 - target_path: path to restore.
1305 - backup_indexes_paths: path to backup indexes, in descending date order.
1306 The indexes indicate the location of their respective backup volumes,
1307 and multiple indexes are needed to be able to restore diff backups.
1308 Note that this is an optional parameter: if not suplied, it will
1309 try to restore directly from backup_tar_path.
1310 - backup_tar_path: path to the backup tar file. Used as an alternative
1311 to backup_indexes_paths to restore directly from a tar file without
1312 using any file index. If it's a multivol tarfile, volume_name_func
1314 - restore_callback: callback function to be called during restore.
1315 This is passed to the helper and gets called for every file.
1317 NOTE: If you want to use an index to restore a backup, this function
1318 only supports to do so when the tarfile mode is either uncompressed or
1319 uses concat compress mode, because otherwise it would be very slow.
1321 NOTE: Indices are assumed to follow the same format as the index_mode
1322 specified in the constructor.
1324 Returns the list of files that could not be restored, if there were
1327 # check/sanitize input
1328 if not isinstance(target_path, str):
1329 raise Exception('Target path must be a string')
1331 if backup_indexes_paths is None and backup_tar_path == []:
1332 raise Exception("You have to either provide index paths or a tar path")
1334 if len(backup_indexes_paths) == 0:
1340 if not isinstance(backup_tar_path, str):
1341 raise Exception('Backup tar path must be a string')
1343 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1344 raise Exception('Source path "%s" does not exist or is not a '\
1345 'file' % backup_tar_path)
1347 if not os.access(backup_tar_path, os.R_OK):
1348 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1350 if not isinstance(backup_indexes_paths, list):
1351 raise Exception('backup_indexes_paths must be a list')
1353 if self.mode.startswith(':') or self.mode.startswith('|'):
1354 raise Exception('Restore only supports either uncompressed tars'
1355 ' or concat compression when restoring from an index, and '
1356 ' the open mode you provided is "%s"' % self.mode)
1358 for index in backup_indexes_paths:
1359 if not isinstance(index, str):
1360 raise Exception('indices must be strings')
1362 if not os.path.exists(index) or not os.path.isfile(index):
1363 raise Exception('Index path "%s" does not exist or is not a '\
1366 if not os.access(index, os.R_OK):
1367 raise Exception('Index path "%s" is not readable' % index)
1369 # try to create backup path if needed
1370 if not os.path.exists(target_path):
1371 os.makedirs(target_path)
1373 # make backup_tar_path absolute so that iterate_tar_path works fine
1374 if backup_tar_path and not os.path.isabs(backup_tar_path):
1375 backup_tar_path = os.path.abspath(backup_tar_path)
1378 os.chdir(target_path)
1380 # setup for decrypting payload
1381 if self.decryptor is None:
1382 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1385 index_it = self.iterate_tar_path(backup_tar_path)
1386 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
1387 tarobj=index_it.tar_obj)
1388 elif mode == "diff":
1389 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1392 # get iterator from newest index at _data[0]
1393 index1 = helper._data[0]["path"]
1394 index_it = self.iterate_index_path(index1)
1395 except tarfile.DecryptionError as exn:
1396 self.logger.error("failed to decrypt file [%s]: %s; is this an "
1397 "actual encrypted index file?"
1398 % (index1, str (exn)))
1399 return [(index1, exn)]
1400 except Exception as exn:
1402 self.logger.error("failed to read file [%s]: %s; is this an "
1403 "actual index file?" % (index1, str (exn)))
1404 return [(index1, exn)]
1406 dir_it = self._recursive_walk_dir('.')
1407 dir_path_it = self.jsonize_path_iterator(dir_it)
1409 failed = [] # irrecoverable files
1411 # for each file to be restored, do:
1412 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1414 upath = dpath['path']
1415 op_type = dpath['type']
1417 upath = self.unprefixed(ipath['path'])
1418 op_type = ipath['type']
1421 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
1424 # if types of the file mismatch, the file needs to be deleted
1426 if ipath is not None and dpath is not None and\
1427 dpath['type'] != ipath['type']:
1428 helper.delete(upath)
1430 # if file not found in dpath, we can directly restore from index
1432 # if the file doesn't exist and it needs to be deleted, it
1433 # means that work is already done
1434 if ipath['path'].startswith('delete://'):
1437 self.logger.debug("restore %s" % ipath['path'])
1438 helper.restore(ipath, l_no, restore_callback)
1439 except Exception as e:
1440 iipath = ipath.get ("path", "")
1441 self.logger.error("FAILED to restore: {} ({})"
1443 if disaster != tarfile.TOLERANCE_STRICT:
1444 failed.append ((iipath, e))
1447 # if both files are equal, we have nothing to restore
1448 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1451 # we have to restore the file, but first we need to delete the
1452 # current existing file.
1453 # we don't delete the file if it's a directory, because it might
1454 # just have changed mtime, so it's quite inefficient to remove
1457 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
1458 helper.delete(upath)
1459 self.logger.debug("restore %s" % ipath['path'])
1461 helper.restore(ipath, l_no, restore_callback)
1462 except Exception as e:
1463 if disaster == tarfile.TOLERANCE_STRICT:
1465 failed.append ((ipath.get ("path", ""), e))
1468 # if the file is not in the index (so it comes from the target
1469 # directory) then we have to delete it
1471 self.logger.debug("delete %s" % upath)
1472 helper.delete(upath)
1474 helper.restore_directories_permissions()
1482 def recover_backup(self, target_path, backup_indexes_paths=[],
1483 restore_callback=None):
1485 Walk the index, extracting objects in disaster mode. Bad files are
1486 reported along with a reason.
1488 return self.restore_backup(target_path,
1489 backup_indexes_paths=backup_indexes_paths,
1490 disaster=tarfile.TOLERANCE_RECOVER)
1493 def rescue_backup(self, target_path, backup_tar_path,
1494 restore_callback=None):
1496 More aggressive “unfsck” mode: do not rely on the index data as the
1497 files may be corrupt; skim files for header-like information and
1498 attempt to retrieve the data.
1500 faux_index = tarfile.gen_rescue_index(backup_tar_path,
1501 password=self.password,
1502 key=self.crypto_key)
1504 return self.restore_backup(target_path,
1505 backup_index=faux_index,
1506 disaster=tarfile.TOLERANCE_RESCUE)
1509 def _parse_json_line(self, f, l_no):
1511 Read line from file like object and process it as JSON.
1516 j = json.loads(l.decode('UTF-8'))
1517 except UnicodeDecodeError as e:
1518 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1520 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1521 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1524 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1525 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1527 except ValueError as e:
1528 raise Exception("error parsing this json line "
1529 "(line number %d): %s" % (l_no, l))
1533 class RestoreHelper(object):
1535 Class used to help to restore files from indices
1538 # holds the dicts of data
1545 # list of directories to be restored. This is done as a last step, see
1546 # tarfile.extractall for details.
1549 _disaster = tarfile.TOLERANCE_STRICT
1551 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
1552 tarobj=None, disaster=tarfile.TOLERANCE_STRICT):
1554 Constructor opens the tars and init the data structures.
1558 - Index list must be provided in reverse order (newer first).
1559 - “newer first” apparently means that if there are n backups
1560 provided, the last full backup is at index n-1 and the most recent
1561 diff backup is at index 0.
1562 - Only the first, the second, and the last elements of
1563 ``index_list`` are relevant, others will not be accessed.
1564 - If no ``index_list`` is provided, both ``tarobj`` and
1565 ``backup_path`` must be passed.
1566 - If ``index_list`` is provided, the values of ``tarobj`` and
1567 ``backup_path`` are ignored.
1570 self._directories = []
1571 self._deltatar = deltatar
1573 self._password = deltatar.password
1574 self._crypto_key = deltatar.crypto_key
1575 self._decryptors = []
1576 self._disaster = disaster
1583 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1584 self.canchown = True
1586 self.canchown = False
1588 if index_list is not None:
1589 for index in index_list:
1590 is_full = index == index_list[-1]
1593 if self._password is not None:
1594 decryptor = crypto.Decrypt (password=self._password,
1595 key=self._crypto_key)
1597 # make paths absolute to avoid cwd problems
1598 if not os.path.isabs(index):
1599 index = os.path.normpath(os.path.join(cwd, index))
1609 last_itelement = None,
1611 new_volume_handler = partial(self.new_volume_handler,
1612 self._deltatar, self._cwd, is_full,
1613 os.path.dirname(index), decryptor),
1614 decryptor = decryptor
1616 self._data.append(s)
1618 # make paths absolute to avoid cwd problems
1619 if not os.path.isabs(backup_path):
1620 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
1622 # update the new_volume_handler of tar_obj
1623 tarobj.new_volume_handler = partial(self.new_volume_handler,
1624 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
1625 self._deltatar.decryptor)
1634 last_itelement = None,
1636 new_volume_handler = tarobj.new_volume_handler,
1637 decryptor = self._deltatar.decryptor
1639 self._data.append(s)
1644 Closes all open files
1646 for data in self._data:
1648 data['vol_fd'].close()
1649 data['vol_fd'] = None
1651 data['tarobj'].close()
1652 data['tarobj'] = None
1654 def delete(self, path):
1658 if not os.path.exists(path):
1661 # to preserve parent directory mtime, we save it
1662 parent_dir = os.path.dirname(path) or os.getcwd()
1663 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1665 if os.path.isdir(path) and not os.path.islink(path):
1670 # now we restore parent_directory mtime
1671 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1673 def restore(self, itpath, l_no, callback=None):
1675 Restore the path from the appropriate backup. Receives the current path
1676 from the newest (=first) index iterator. itpath must be not null.
1677 callback is a custom function that gets called for every file.
1679 NB: This function takes the attribute ``_data`` as input but will only
1680 ever use its first and, if available, second element. Anything else in
1681 ``._data[]`` will be ignored.
1683 path = itpath['path']
1685 # Calls the callback function
1689 if path.startswith('delete://'):
1690 # the file has previously been deleted already in restore_backup in
1691 # all cases so we just need to finish
1694 # get data from newest index (_data[0])
1695 data = self._data[0]
1696 upath = self._deltatar.unprefixed(path)
1698 # to preserve parent directory mtime, we save it
1699 parent_dir = os.path.dirname(upath) or os.getcwd()
1700 if not os.path.exists(parent_dir):
1701 os.makedirs(parent_dir)
1702 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1704 # if path is found in the newest index as to be snapshotted, deal with it
1706 if path.startswith('snapshot://'):
1708 self.restore_file(itpath, data, path, l_no, upath)
1712 # now we restore parent_directory mtime
1713 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1716 # we go from index to index, finding the path in the index, then finding
1717 # the index with the most recent snapshot of the file being restored
1719 # Right now we support diff backups, only. No incremental backups.
1720 # As a result _data[0] is always the diff backup index
1721 # and _data[1] the full backup index.
1722 if len(self._data) == 2:
1723 data = self._data[1]
1724 d, l_no, dpath = self.find_path_in_index(data, upath)
1726 self._deltatar.logger.warning('Error restoring file %s from '
1727 'index, not found in index %s' % (path, data['path']))
1730 cur_path = d.get('path', '')
1731 if cur_path.startswith('delete://'):
1732 self._deltatar.logger.warning(('Strange thing happened, file '
1733 '%s was listed in first index but deleted by another '
1734 'one. Path was ignored and untouched.') % path)
1736 elif cur_path.startswith('snapshot://'):
1737 # this code path is reached when the file is unchanged
1738 # in the newest index and therefore of type 'list://'
1739 self.restore_file(d, data, path, l_no, dpath)
1741 # now we restore parent_directory mtime
1742 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1745 # error code path is reached when:
1746 # a) we have more than two indexes (unsupported atm)
1747 # b) both indexes contain a list:// entry (logic error)
1748 # c) we have just one index and it also contains list://
1749 self._deltatar.logger.warning(('Error restoring file %s from index, '
1750 'snapshot not found in any index') % path)
1752 def find_path_in_index(self, data, upath):
1753 # NOTE: we restart the iterator sometimes because the iterator can be
1754 # walked over completely multiple times, for example if one path if not
1755 # found in one index and we have to go to the next index.
1756 it = data['iterator']
1758 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
1759 d, l_no = it.__next__()
1761 d = data['last_itelement']
1762 l_no = data['last_lno']
1765 dpath = self._deltatar.unprefixed(d.get('path', ''))
1767 data['last_itelement'] = d
1768 data['last_lno'] = l_no
1769 return d, l_no, dpath
1771 up, dp = self._deltatar.compare_indexes(upath, dpath)
1772 # any time upath should have appeared before current dpath, it means
1773 # upath is just not in this index and we should stop
1775 data['last_itelement'] = d
1776 data['last_lno'] = l_no
1780 d, l_no = it.__next__()
1781 except StopIteration:
1782 data['last_itelement'] = d
1783 data['last_lno'] = l_no
1786 def restore_directories_permissions(self):
1788 Restore directory permissions when everything have been restored
1795 self._directories.sort(key=operator.attrgetter('name'))
1796 self._directories.reverse()
1798 # Set correct owner, mtime and filemode on directories.
1799 for member in self._directories:
1800 dirpath = member.name
1802 os.chmod(dirpath, member.mode)
1803 os.utime(dirpath, (member.mtime, member.mtime))
1805 # We have to be root to do so.
1807 g = grp.getgrnam(member.gname)[2]
1811 u = pwd.getpwnam(member.uname)[2]
1815 if member.issym and hasattr(os, "lchown"):
1816 os.lchown(dirpath, u, g)
1818 os.chown(dirpath, u, g)
1819 except EnvironmentError:
1820 raise tarfile.ExtractError("could not change owner")
1822 except tarfile.ExtractError as e:
1823 self._deltatar.logger.warning('tarfile: %s' % e)
1826 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
1828 Handles the new volumes
1830 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1831 volume_number, guess_name=True)
1832 volume_path = os.path.join(backup_path, volume_name)
1834 # we convert relative paths into absolute because CWD is changed
1835 if not os.path.isabs(volume_path):
1836 volume_path = os.path.join(cwd, volume_path)
1837 tarobj.open_volume(volume_path, encryption=encryption)
1839 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
1841 Restores a snapshot of a file from a specific backup
1843 op_type = file_data.get('type', -1)
1844 member = file_data.get('member', None)
1845 ismember = bool(member)
1847 # when member is set, then we can assume everything is right and we
1848 # just have to restore the path
1850 vol_no = file_data.get('volume', -1)
1852 if not isinstance(vol_no, int) or vol_no < 0:
1853 self._deltatar.logger.warning('unrecognized type to be restored: '
1854 '%s, line %d' % (op_type, l_no))
1856 # setup the volume that needs to be read. only needed when member is
1858 if index_data['curr_vol_no'] != vol_no:
1859 index_data['curr_vol_no'] = vol_no
1860 backup_path = os.path.dirname(index_data['path'])
1861 vol_name = self._deltatar.volume_name_func(backup_path,
1862 index_data['is_full'], vol_no, guess_name=True)
1863 vol_path = os.path.join(backup_path, vol_name)
1864 if index_data['vol_fd']:
1865 index_data['vol_fd'].close()
1866 index_data['vol_fd'] = open(vol_path, 'rb')
1868 # force reopen of the tarobj because of new volume
1869 if index_data['tarobj']:
1870 index_data['tarobj'].close()
1871 index_data['tarobj'] = None
1873 # seek tarfile if needed
1874 offset = file_data.get('offset', -1)
1875 if index_data['tarobj']:
1877 member = index_data['tarobj'].__iter__().__next__()
1878 except tarfile.DecryptionError:
1880 except tarfile.CompressionError:
1883 if not member or member.path != file_data['path']:
1884 # force a seek and reopen
1885 index_data['tarobj'].close()
1886 index_data['tarobj'] = None
1888 # open the tarfile if needed
1889 if not index_data['tarobj']:
1890 index_data['vol_fd'].seek(offset)
1891 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1892 fileobj=index_data['vol_fd'],
1893 format=tarfile.GNU_FORMAT,
1894 concat='#' in self._deltatar.mode,
1895 encryption=index_data["decryptor"],
1896 new_volume_handler=index_data['new_volume_handler'],
1897 save_to_members=False,
1898 tolerance=self._disaster)
1900 member = index_data['tarobj'].__iter__().__next__()
1902 member.path = unprefixed_path
1903 member.name = unprefixed_path
1905 if op_type == 'directory':
1906 self.add_member_dir(member)
1907 member = copy.copy(member)
1908 member.mode = 0o0700
1910 # if it's an existing directory, we then don't need to recreate it
1911 # just set the right permissions, mtime and that kind of stuff
1912 if os.path.exists(member.path):
1916 # set current volume number in tarobj, otherwise the extraction of the
1917 # file might fail when trying to extract a multivolume member
1918 index_data['tarobj'].volume_number = index_data['curr_vol_no']
1920 def ignore_symlink (member, *_args):
1921 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
1923 # finally, restore the file
1924 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
1926 def add_member_dir(self, member):
1928 Add member dir to be restored at the end
1930 if not self.canchown:
1931 self._directories.append(DirItem(name=member.name, mode=member.mode,
1932 mtime=member.mtime))
1934 self._directories.append(DirItem(name=member.name, mode=member.mode,
1935 mtime=member.mtime, gname=member.gname, uname=member.uname,
1936 uid=member.uid, gid=member.gid, issym=member.issym()))
1938 class DirItem(object):
1939 def __init__(self, **kwargs):
1940 for k, v in kwargs.items():