3 # Copyright (C) 2013, 2014 Intra2net AG
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published
7 # by the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see
17 # <http://www.gnu.org/licenses/lgpl-3.0.html>
19 # Author: Eduardo Robles Elvira <edulix@wadobo.com>
21 DELTATAR_HEADER_VERSION = 1
22 DELTATAR_PARAMETER_VERSION = 1
35 from functools import partial
40 class NullHandler(logging.Handler):
41 def emit(self, record):
45 logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
53 # encryption direction
54 CRYPTO_MODE_ENCRYPT = 0
55 CRYPTO_MODE_DECRYPT = 1
57 # The canonical extension for encrypted backup files regardless of the actual
58 # encryption parameters is “.pdtcrypt”. This is analogous to the encryption
59 # header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
60 # Since the introduction of the versioned header there no longer any need
61 # for encoding encryption parameters in the file extensions (“.aes128” and
63 PDTCRYPT_EXTENSION = "pdtcrypt"
67 AUXILIARY_FILE_INDEX = 0
68 AUXILIARY_FILE_INFO = 1
70 class DeltaTar(object):
72 Backup class used to create backups
75 # list of files to exclude in the backup creation or restore operation. It
76 # can contain python regular expressions.
79 # list of files to include in the backup creation or restore operation. It
80 # can contain python regular expressions. If empty, all files in the source
81 # path will be backed up (when creating a backup) or all the files in the
82 # backup will be restored (when restoring a backup), but if included_files
83 # is set then only the files include in the list will be processed.
86 # custom filter of files to be backed up (or restored). Unused and unset
87 # by default. The function receives a file path and must return a boolean.
90 # mode in which the delta will be created (when creating a backup) or
91 # opened (when restoring). Accepts modes analog to the tarfile library.
94 # used together with aes modes to encrypt and decrypt backups.
99 # parameter version to use when encrypting; note that this has no effect
100 # on decryption since the required settings are determined from the headers
101 crypto_version = DELTATAR_HEADER_VERSION
102 crypto_paramversion = None
104 # when encrypting or decrypting, these hold crypto handlers; created before
105 # establishing the Tarfile stream iff a password is supplied.
109 # python logger object.
112 # specifies the index mode in the same format as @param mode, but without
113 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
114 # that the index is encrypted if no password is given in the constructor.
117 # current time for this backup. Used for file names and file creation checks
120 # extra data to included in the header of the index file when creating a
124 # valid tarfile modes and their corresponding default file extension
125 __file_extensions_dict = {
134 '#gz.pdtcrypt': '.gz',
139 # valid index modes and their corresponding default file extension
140 __index_extensions_dict = {
144 'gz.pdtcrypt': '.gz',
148 # valid path prefixes
149 __path_prefix_list = [
155 def __init__(self, excluded_files=[], included_files=[],
156 filter_func=None, mode="", password=None,
157 crypto_key=None, nacl=None,
158 crypto_version=DELTATAR_HEADER_VERSION,
159 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
160 logger=None, index_mode=None, index_name_func=None,
161 volume_name_func=None):
163 Constructor. Configures the diff engine.
166 - excluded_files: list of files to exclude in the backup creation or
167 restore operation. It can contain python regular expressions.
169 - included_files: list of files to include in the backup creation or
170 restore operation. It can contain python regular expressions. If
171 empty, all files in the source path will be backed up (when creating a
172 backup) or all the files in the backup will be restored (when
173 restoring a backup), but if included_files is set then only the files
174 include in the list will be processed.
176 - filter_func: custom filter of files to be backed up (or restored).
177 Unused and unset by default. The function receives a file path and
178 must return a boolean.
180 - mode: mode in which the delta will be created (when creating a backup)
181 or opened (when restoring). Accepts the same modes as the tarfile
182 library. Valid modes are:
185 ':' open uncompressed
186 ':gz' open with gzip compression
187 ':bz2' open with bzip2 compression
188 '|' open an uncompressed stream of tar blocks
189 '|gz' open a gzip compressed stream of tar blocks
190 '|bz2' open a bzip2 compressed stream of tar blocks
191 '#gz' open a stream of gzip compressed tar blocks
193 - crypto_key: used to encrypt and decrypt backups. Encryption will
194 be enabled automatically if a key is supplied. Requires a salt to be
197 - nacl: salt that was used to derive the encryption key for embedding
198 in the PDTCRYPT header. Not needed when decrypting and when
199 encrypting with password.
201 - password: used to encrypt and decrypt backups. Encryption will be
202 enabled automatically if a password is supplied.
204 - crypto_version: version of the format, determining the kind of PDT
207 - crypto_paramversion: optionally request encryption conforming to
208 a specific parameter version. Defaults to the standard PDT value
209 which as of 2017 is the only one available.
211 - logger: python logger object. Optional.
213 - index_mode: specifies the index mode in the same format as @param
214 mode, but without the ':', '|' or '#' at the begining. If encryption
215 is requested it will extend to the auxiliary (index, info) files as
216 well. This is an optional parameter that will automatically mimic
217 @param mode by default if not provided. Valid modes are:
220 'gz' open with gzip compression
221 'bz2' open with bzip2 compression
223 - index_name_func: function that sets a custom name for the index file.
224 This function receives a flag to indicate whether the name will be
225 used for a full or diff backup. The backup path will be prepended to
228 - volume_name_func: function that defines the name of tar volumes. It
229 receives the backup_path, if it's a full backup and the volume number,
230 and must return the name for the corresponding volume name. Optional,
231 DeltaTar has default names for tar volumes.
234 if mode not in self.__file_extensions_dict:
235 raise Exception('Unrecognized extension mode=[%s] requested for files'
238 self.excluded_files = excluded_files
239 self.included_files = included_files
240 self.filter_func = filter_func
241 self.logger = logging.getLogger('deltatar.DeltaTar')
243 self.logger.addHandler(logger)
246 if crypto_key is not None:
247 self.crypto_key = crypto_key
248 self.nacl = nacl # encryption only
250 if password is not None:
251 self.password = password
253 if crypto_version is not None:
254 self.crypto_version = crypto_version
256 if crypto_paramversion is not None:
257 self.crypto_paramversion = crypto_paramversion
259 # generate index_mode
260 if index_mode is None:
266 elif mode not in self.__index_extensions_dict:
267 raise Exception('Unrecognized extension mode=[%s] requested for index'
270 self.index_mode = index_mode
271 self.current_time = datetime.datetime.now()
273 if index_name_func is not None:
274 self.index_name_func = index_name_func
276 if volume_name_func is not None:
277 self.volume_name_func = volume_name_func
279 def pick_extension(self, kind, mode=None):
281 Choose the extension depending on a) the kind of file given, b) the
282 processing mode, and c) the current encryption settings.
285 if kind == PDT_TYPE_ARCHIVE:
288 mode = self.__index_extensions_dict [self.index_mode]
290 if self.crypto_key is not None or self.password is not None:
291 ret += "." + PDTCRYPT_EXTENSION
294 def index_name_func(self, is_full): # pylint: disable=method-hidden
296 Callback for setting a custom name for the index file. Depending on
297 whether *is_full* is set, it will create a suitable name for a full
300 prefix = "bfull" if is_full else "bdiff"
301 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
302 extension = self.pick_extension \
304 self.__index_extensions_dict [self.index_mode])
306 return "%s-%s.index%s" % (prefix, date_str, extension)
308 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
309 is_full, volume_number,
312 function that defines the name of tar volumes. It receives the
313 backup_path, if it's a full backup and the volume number, and must return
314 the name for the corresponding volume name. Optional, DeltaTar has default
315 names for tar volumes.
317 If guess_name is activated, the file is intended not to be created but
318 to be found, and thus the date will be guessed.
320 prefix = "bfull" if is_full else "bdiff"
321 extension = self.pick_extension \
323 self.__file_extensions_dict [self.mode])
326 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
327 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
329 prefix = prefix + "-"
330 postfix = "-%03d%s" % (volume_number + 1, extension)
331 for f in os.listdir(backup_path):
332 if f.startswith(prefix) and f.endswith(postfix):
334 raise Exception("volume not found")
337 def filter_path(self, path, source_path="", is_dir=None):
339 Filters a path, given the source_path, using the filtering properties
340 set in the constructor.
341 The filtering order is:
342 1. included_files (if any)
344 3. filter_func (which must return whether the file is accepted or not)
347 if len(source_path) > 0:
348 # ensure that exactly one '/' at end of dir is also removed
349 source_path = source_path.rstrip(os.sep) + os.sep
350 path = path[len(source_path):]
352 # 1. filter included_files
354 if len(self.included_files) > 0:
356 for i in self.included_files:
357 # it can be either a regexp or a string
358 if isinstance(i, str):
359 # if the string matches, then continue
364 # if the string ends with / it's a directory, and if the
365 # path is contained in it, it is included
366 if i.endswith('/') and path.startswith(i):
370 # if the string doesn't end with /, add it and do the same
372 elif path.startswith(i + '/'):
376 # check for PARENT_MATCH
379 if not dir_path.endswith('/'):
382 if i.startswith(dir_path):
385 # if it's a reg exp, then we just check if it matches
386 elif isinstance(i, re._pattern_type):
391 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
393 if match == NO_MATCH:
396 # when a directory is in PARENT_MATCH, it doesn't matter if it's
397 # excluded. It's subfiles will be excluded, but the directory itself
399 if match != PARENT_MATCH:
400 for e in self.excluded_files:
401 # it can be either a regexp or a string
402 if isinstance(e, str):
403 # if the string matches, then exclude
407 # if the string ends with / it's a directory, and if the
408 # path starts with the directory, then exclude
409 if e.endswith('/') and path.startswith(e):
412 # if the string doesn't end with /, do the same check with
414 elif path.startswith(e + '/'):
417 # if it's a reg exp, then we just check if it matches
418 elif isinstance(e, re._pattern_type):
422 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
425 return self.filter_func(path)
429 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
431 Walk a directory recursively, yielding each file/directory
434 source_path = source_path.rstrip(os.sep)
439 beginning_size = len(source_path) + 1 # +1 for os.sep
441 queue = [source_path]
444 cur_path = queue.pop(0)
446 # it might have been removed in the mean time
447 if not os.path.exists(cur_path):
450 for filename in sorted(os.listdir(cur_path)):
451 child = os.path.join(cur_path, filename)
452 is_dir = os.path.isdir(child)
453 status = self.filter_path(child, source_path, is_dir)
454 if status == NO_MATCH:
456 if not os.access(child, os.R_OK):
457 self.logger.warning('Error accessing possibly locked file %s' % child)
461 yield child[beginning_size:]
463 if is_dir and (status == MATCH or status == PARENT_MATCH):
466 def _stat_dict(self, path):
468 Returns a dict with the stat data used to compare files
470 stinfo = os.stat(path)
471 mode = stinfo.st_mode
474 if stat.S_ISDIR(mode):
476 elif stat.S_ISREG(mode):
478 elif stat.S_ISLNK(mode):
485 u'mtime': int(stinfo.st_mtime),
486 u'ctime': int(stinfo.st_ctime),
487 u'uid': stinfo.st_uid,
488 u'gid': stinfo.st_gid,
489 u'inode': stinfo.st_ino,
490 u'size': stinfo.st_size
493 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
495 Return if the dicts are equal in the stat keys
497 keys = [u'type', u'mode',u'size', u'mtime',
498 # not restored: u'inode', u'ctime'
501 # only if user is root, then also check gid/uid. otherwise do not check it,
502 # because tarfile can chown in case of being superuser only
504 # also, skip the check in rpmbuild since the sources end up with the
505 # uid:gid of the packager while the extracted files are 0:0.
506 if hasattr(os, "geteuid") and os.geteuid() == 0 \
507 and os.getenv ("RPMBUILD_OPTIONS") is None:
511 if (not d1 and d2 != None) or (d1 != None and not d2):
514 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
517 type = d1.get('type', '')
520 # size doesn't matter for directories
521 if type == 'directory' and key == 'size':
523 if d1.get(key, -1) != d2.get(key, -2):
527 def prefixed(self, path, listsnapshot_equal=False):
529 if a path is not prefixed, return it prefixed
531 for prefix in self.__path_prefix_list:
532 if path.startswith(prefix):
533 if listsnapshot_equal and prefix == u'list://':
534 return u'snapshot://' + path[len(prefix):]
536 return u'snapshot://' + path
538 def unprefixed(self, path):
540 remove a path prefix if any
542 for prefix in self.__path_prefix_list:
543 if path.startswith(prefix):
544 return path[len(prefix):]
548 def initialize_encryption (self, mode):
549 password = self.password
550 key = self.crypto_key
553 if key is None and password is None:
555 if mode == CRYPTO_MODE_ENCRYPT:
556 return crypto.Encrypt (password=password,
559 version=self.crypto_version,
560 paramversion=self.crypto_paramversion)
561 if mode == CRYPTO_MODE_DECRYPT:
562 return crypto.Decrypt (password=password, key=key)
564 raise Exception ("invalid encryption mode [%r]" % mode)
567 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
569 Given the specified configuration, opens a file for reading or writing,
570 inheriting the encryption and compression settings from the backup.
571 Returns a file object ready to use.
573 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
576 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
577 Both the info and the auxiliary file have a globally
578 unique, constant counter value.
581 if self.index_mode.startswith('gz'):
583 elif self.index_mode.startswith('bz2'):
591 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
593 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
595 if crypto_ctx is not None:
596 if kind == AUXILIARY_FILE_INFO:
597 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
598 elif kind == AUXILIARY_FILE_INDEX:
599 enccounter = crypto.AES_GCM_IV_CNT_INDEX
601 raise Exception ("invalid kind of aux file %r" % kind)
603 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
604 bufsize=tarfile.RECORDSIZE, fileobj=None,
605 encryption=crypto_ctx, enccounter=enccounter)
610 def create_full_backup(self, source_path, backup_path,
611 max_volume_size=None, extra_data=dict()):
613 Creates a full backup.
616 - source_path: source path to the directory to back up.
617 - backup_path: path where the back up will be stored. Backup path will
618 be created if not existent.
619 - max_volume_size: maximum volume size in megabytes. Used to split the
620 backup in volumes. Optional (won't split in volumes by default).
621 - extra_data: a json-serializable dictionary with information that you
622 want to be included in the header of the index file
625 if not isinstance(source_path, str):
626 raise Exception('Source path must be a string')
628 if not isinstance(backup_path, str):
629 raise Exception('Backup path must be a string')
631 if not os.path.exists(source_path) or not os.path.isdir(source_path):
632 raise Exception('Source path "%s" does not exist or is not a '\
633 'directory' % source_path)
635 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
636 max_volume_size < 1):
637 raise Exception('max_volume_size must be a positive integer')
638 if max_volume_size != None:
639 max_volume_size = max_volume_size*1024*1024
641 if not isinstance(extra_data, dict):
642 raise Exception('extra_data must be a dictionary')
645 extra_data_str = json.dumps(extra_data)
647 raise Exception('extra_data is not json-serializable')
649 if not os.access(source_path, os.R_OK):
650 raise Exception('Source path "%s" is not readable' % source_path)
652 # try to create backup path if needed
653 if not os.path.exists(backup_path):
654 os.makedirs(backup_path)
656 if not os.access(backup_path, os.W_OK):
657 raise Exception('Backup path "%s" is not writeable' % backup_path)
659 if source_path.endswith('/'):
660 source_path = source_path[:-1]
662 if backup_path.endswith('/'):
663 backup_path = backup_path[:-1]
665 # update current time
666 self.current_time = datetime.datetime.now()
668 if self.mode not in self.__file_extensions_dict:
669 raise Exception('Unrecognized extension')
671 # setup for encrypting payload
672 if self.encryptor is None:
673 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
675 # some initialization
678 # generate the first volume name
679 vol_name = self.volume_name_func(backup_path, True, 0)
680 tarfile_path = os.path.join(backup_path, vol_name)
683 index_name = self.index_name_func(True)
684 index_path = os.path.join(backup_path, index_name)
685 index_sink = self.open_auxiliary_file(index_path, 'w')
689 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
691 Handles the new volumes
693 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
694 volume_path = os.path.join(backup_path, volume_name)
695 deltarobj.vol_no = volume_number
697 # we convert relative paths into absolute because CWD is changed
698 if not os.path.isabs(volume_path):
699 volume_path = os.path.join(cwd, volume_path)
701 if tarobj.fileobj is not None:
702 tarobj.fileobj.close()
704 deltarobj.logger.debug("opening volume %s" % volume_path)
706 tarobj.open_volume(volume_path, encryption=encryption)
708 # wraps some args from context into the handler
709 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
711 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
713 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
714 # calculate checksum and write into the stream
715 crc = binascii.crc32(s) & 0xFFFFffff
718 # start creating the tarfile
719 tarobj = tarfile.TarFile.open(tarfile_path,
720 mode='w' + self.mode,
721 format=tarfile.GNU_FORMAT,
722 concat='#' in self.mode,
723 encryption=self.encryptor,
724 max_volume_size=max_volume_size,
725 new_volume_handler=new_volume_handler,
726 save_to_members=False,
728 os.chdir(source_path)
730 # for each file to be in the backup, do:
731 for path in self._recursive_walk_dir('.'):
732 # calculate stat dict for current file
733 statd = self._stat_dict(path)
734 statd['path'] = u'snapshot://' + statd['path']
735 statd['volume'] = self.vol_no
738 tarobj.add(path, arcname = statd['path'], recursive=False)
740 # retrieve file offset
741 statd['offset'] = tarobj.get_last_member_offset()
742 self.logger.debug("backup %s" % statd['path'])
744 # store the stat dict in the index
745 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
746 crc = binascii.crc32(s, crc) & 0xffffffff
749 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
750 crc = binascii.crc32(s, crc) & 0xffffffff
752 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
757 index_sink.close (close_fileobj=True)
759 def create_diff_backup(self, source_path, backup_path, previous_index_path,
760 max_volume_size=None, extra_data=dict()):
765 - source_path: source path to the directory to back up.
766 - backup_path: path where the back up will be stored. Backup path will
767 be created if not existent.
768 - previous_index_path: index of the previous backup, needed to know
769 which files changed since then.
770 - max_volume_size: maximum volume size in megabytes (MB). Used to split
771 the backup in volumes. Optional (won't split in volumes by default).
773 NOTE: previous index is assumed to follow exactly the same format as
774 the index_mode setup in the constructor.
776 # check/sanitize input
777 if not isinstance(source_path, str):
778 raise Exception('Source path must be a string')
780 if not isinstance(backup_path, str):
781 raise Exception('Backup path must be a string')
783 if not os.path.exists(source_path) or not os.path.isdir(source_path):
784 raise Exception('Source path "%s" does not exist or is not a '\
785 'directory' % source_path)
787 if not isinstance(extra_data, dict):
788 raise Exception('extra_data must be a dictionary')
791 extra_data_str = json.dumps(extra_data)
793 raise Exception('extra_data is not json-serializable')
795 if not os.access(source_path, os.R_OK):
796 raise Exception('Source path "%s" is not readable' % source_path)
798 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
799 max_volume_size < 1):
800 raise Exception('max_volume_size must be a positive integer')
801 if max_volume_size != None:
802 max_volume_size = max_volume_size*1024*1024
804 if not isinstance(previous_index_path, str):
805 raise Exception('previous_index_path must be A string')
807 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
808 raise Exception('Index path "%s" does not exist or is not a '\
809 'file' % previous_index_path)
811 if not os.access(previous_index_path, os.R_OK):
812 raise Exception('Index path "%s" is not readable' % previous_index_path)
814 # try to create backup path if needed
815 if not os.path.exists(backup_path):
816 os.makedirs(backup_path)
818 if not os.access(backup_path, os.W_OK):
819 raise Exception('Backup path "%s" is not writeable' % backup_path)
821 if source_path.endswith('/'):
822 source_path = source_path[:-1]
824 if backup_path.endswith('/'):
825 backup_path = backup_path[:-1]
827 # update current time
828 self.current_time = datetime.datetime.now()
830 if self.mode not in self.__file_extensions_dict:
831 raise Exception('Unrecognized extension')
833 # setup for encrypting payload
834 if self.encryptor is None:
835 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
837 # some initialization
840 # generate the first volume name
841 vol_name = self.volume_name_func(backup_path, is_full=False,
843 tarfile_path = os.path.join(backup_path, vol_name)
848 index_name = self.index_name_func(is_full=False)
849 index_path = os.path.join(backup_path, index_name)
850 index_sink = self.open_auxiliary_file(index_path, 'w')
852 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
854 Handles the new volumes
856 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
857 volume_number=volume_number)
858 volume_path = os.path.join(backup_path, volume_name)
859 deltarobj.vol_no = volume_number
861 # we convert relative paths into absolute because CWD is changed
862 if not os.path.isabs(volume_path):
863 volume_path = os.path.join(cwd, volume_path)
865 deltarobj.logger.debug("opening volume %s" % volume_path)
866 tarobj.open_volume(volume_path)
868 # wraps some args from context into the handler
869 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
871 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
873 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
874 # calculate checksum and write into the stream
875 crc = binascii.crc32(s) & 0xFFFFffff
878 # start creating the tarfile
879 tarobj = tarfile.TarFile.open(tarfile_path,
880 mode='w' + self.mode,
881 format=tarfile.GNU_FORMAT,
882 concat='#' in self.mode,
883 encryption=self.encryptor,
884 max_volume_size=max_volume_size,
885 new_volume_handler=new_volume_handler,
886 save_to_members=False,
890 # create the iterators, first the previous index iterator, then the
891 # source path directory iterator and collate and iterate them
892 if not os.path.isabs(previous_index_path):
893 previous_index_path = os.path.join(cwd, previous_index_path)
894 index_it = self.iterate_index_path(previous_index_path)
896 os.chdir(source_path)
897 dir_it = self._recursive_walk_dir('.')
898 dir_path_it = self.jsonize_path_iterator(dir_it)
906 # for each file to be in the backup, do:
907 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
909 # if file is not in the index, it means it's a new file, so we have
914 # if the file is not in the directory iterator, it means that it has
915 # been deleted, so we need to mark it as such
918 # if the file is in both iterators, it means it might have either
919 # not changed (in which case we will just list it in our index but
920 # it will not be included in the tar file), or it might have
921 # changed, in which case we will snapshot it.
922 elif ipath and dpath:
923 if self._equal_stat_dicts(ipath, dpath):
927 # TODO: when creating chained backups (i.e. diffing from another
928 # diff), we will need to detect the type of action in the previous
929 # index, because if it was delete and dpath is None, we should
932 if action == 'snapshot':
933 # calculate stat dict for current file
935 stat['path'] = "snapshot://" + dpath['path']
936 stat['volume'] = self.vol_no
938 self.logger.debug("[STORE] %s" % dpath['path'])
941 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
943 # retrieve file offset
944 stat['offset'] = tarobj.get_last_member_offset()
945 elif action == 'delete':
946 path = self.unprefixed(ipath['path'])
948 u'path': u'delete://' + path,
949 u'type': ipath['type']
951 self.logger.debug("[DELETE] %s" % path)
953 # mark it as deleted in the backup
954 tarobj.add("/dev/null", arcname=stat['path'])
955 elif action == 'list':
957 path = self.unprefixed(ipath['path'])
958 stat['path'] = u'list://' + path
959 # unchanged files do not enter in the backup, only in the index
960 self.logger.debug("[UNCHANGED] %s" % path)
963 self.logger.warning('unknown action in create_diff_backup: {0}'
968 # store the stat dict in the index
969 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
970 crc = binascii.crc32(s, crc) & 0xffffffff
973 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
974 crc = binascii.crc32(s, crc) & 0xffffffff
976 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
985 def iterate_index_path(self, index_path):
987 Returns an index iterator. Internally, it uses a classic iterator class.
988 We do that instead of just yielding so that the iterator object can have
989 an additional function to close the file descriptor that is opened in
993 class IndexPathIterator(object):
994 def __init__(self, delta_tar, index_path):
995 self.delta_tar = delta_tar
996 self.index_path = index_path
998 self.extra_data = dict()
1008 def __enter__(self):
1010 Allows this iterator to be used with the "with" statement
1013 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
1014 # check index header
1015 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1016 if j.get("type", '') != 'python-delta-tar-index' or\
1017 j.get('version', -1) != 1:
1018 raise Exception("invalid index file format: %s" % json.dumps(j))
1020 self.extra_data = j.get('extra_data', dict())
1022 # find BEGIN-FILE-LIST, ignore other headers
1024 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1025 if j.get('type', '') == 'BEGIN-FILE-LIST':
1029 def __exit__(self, type, value, tb):
1031 Allows this iterator to be used with the "with" statement
1038 # read each file in the index and process it to do the restore
1042 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1043 except Exception as e:
1048 op_type = j.get('type', '')
1050 # when we detect the end of the list, break the loop
1051 if op_type == 'END-FILE-LIST':
1057 if op_type not in ['directory', 'file', 'link']:
1058 self.delta_tar.logger.warning('unrecognized type to be '
1059 'restored: %s, line %d' % (op_type, l_no))
1061 return self.__next__()
1065 return IndexPathIterator(self, index_path)
1067 def iterate_tar_path(self, tar_path, new_volume_handler=None):
1069 Returns a tar iterator that iterates jsonized member items that contain
1070 an additional "member" field, used by RestoreHelper.
1072 class TarPathIterator(object):
1073 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
1074 self.delta_tar = delta_tar
1075 self.tar_path = tar_path
1077 self.last_member = None
1078 self.new_volume_handler = new_volume_handler
1086 self.tar_obj.close()
1088 def __enter__(self):
1090 Allows this iterator to be used with the "with" statement
1092 if self.tar_obj is None:
1094 if self.delta_tar.password is not None:
1095 decryptor = crypto.Decrypt \
1096 (password=self.delta_tar.password,
1097 key=self.delta_tar.crypto_key)
1098 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1099 mode='r' + self.delta_tar.mode,
1100 format=tarfile.GNU_FORMAT,
1101 concat='#' in self.delta_tar.mode,
1102 encryption=decryptor,
1103 new_volume_handler=self.new_volume_handler,
1104 save_to_members=False,
1108 def __exit__(self, type, value, tb):
1110 Allows this iterator to be used with the "with" statement
1113 self.tar_obj.close()
1118 Read each member and return it as a stat dict
1120 tarinfo = self.tar_obj.__iter__().__next__()
1121 # NOTE: here we compare if tarinfo.path is the same as before
1122 # instead of comparing the tarinfo object itself because the
1123 # object itself might change for multivol tarinfos
1124 if tarinfo is None or (self.last_member is not None and\
1125 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
1128 self.last_member = tarinfo
1131 if tarinfo.isfile():
1133 elif tarinfo.isdir():
1135 elif tarinfo.islnk() or tarinfo.issym():
1140 u'path': tarinfo.path,
1141 u'mode': tarinfo.mode,
1142 u'mtime': tarinfo.mtime,
1143 u'ctime': -1, # cannot restore
1144 u'uid': tarinfo.uid,
1145 u'gid': tarinfo.gid,
1146 u'inode': -1, # cannot restore
1147 u'size': tarinfo.size,
1151 return TarPathIterator(self, tar_path, new_volume_handler)
1153 def jsonize_path_iterator(self, iter, strip=0):
1155 converts the yielded items of an iterator into json path lines.
1157 strip: Strip the smallest prefix containing num leading slashes from
1162 path = iter.__next__()
1164 yield self._stat_dict(path), 0
1166 st = self._stat_dict(path)
1167 st['path'] = "/".join(path.split("/")[strip:])
1169 except StopIteration:
1172 def collate_iterators(self, it1, it2):
1174 Collate two iterators, so that it returns pairs of the items of each
1175 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1176 when there's no match for the items in the other iterator.
1178 It assumes that the items in both lists are ordered in the same way.
1181 elem1, elem2 = None, None
1185 elem1, l_no = it1.__next__()
1186 except StopIteration:
1188 yield (None, elem2, l_no)
1190 if isinstance(elem2, tuple):
1192 yield (None, elem2, l_no)
1196 elem2 = it2.__next__()
1197 if isinstance(elem2, tuple):
1199 except StopIteration:
1201 yield (elem1, None, l_no)
1202 for elem1, l_no in it1:
1203 yield (elem1, None, l_no)
1206 index1 = self.unprefixed(elem1['path'])
1207 index2 = self.unprefixed(elem2['path'])
1208 i1, i2 = self.compare_indexes(index1, index2)
1210 yield1 = yield2 = None
1217 yield (yield1, yield2, l_no)
1219 def compare_indexes(self, index1, index2):
1221 Compare iterator indexes and return a tuple in the following form:
1222 if index1 < index2, returns (index1, None)
1223 if index1 == index2 returns (index1, index2)
1224 else: returns (None, index2)
1226 l1 = index1.split('/')
1227 l2 = index2.split('/')
1228 length = len(l2) - len(l1)
1231 return (index1, None)
1233 return (None, index2)
1235 for i1, i2 in zip(l1, l2):
1237 return (index1, None)
1239 return (None, index2)
1241 return (index1, index2)
1243 def list_backup(self, backup_tar_path, list_func=None):
1244 if not isinstance(backup_tar_path, str):
1245 raise Exception('Backup tar path must be a string')
1247 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1248 raise Exception('Source path "%s" does not exist or is not a '\
1249 'file' % backup_tar_path)
1251 if not os.access(backup_tar_path, os.R_OK):
1252 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1256 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
1258 Handles the new volumes
1260 volume_name = deltarobj.volume_name_func(backup_path, True,
1261 volume_number, guess_name=True)
1262 volume_path = os.path.join(backup_path, volume_name)
1264 # we convert relative paths into absolute because CWD is changed
1265 if not os.path.isabs(volume_path):
1266 volume_path = os.path.join(cwd, volume_path)
1267 tarobj.open_volume(volume_path, encryption=encryption)
1269 if self.decryptor is None:
1270 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1272 backup_path = os.path.dirname(backup_tar_path)
1273 if not os.path.isabs(backup_path):
1274 backup_path = os.path.join(cwd, backup_path)
1275 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
1277 tarobj = tarfile.TarFile.open(backup_tar_path,
1278 mode='r' + self.mode,
1279 format=tarfile.GNU_FORMAT,
1280 concat='#' in self.mode,
1281 encryption=self.decryptor,
1282 new_volume_handler=new_volume_handler,
1283 save_to_members=False,
1286 def filter(cls, list_func, tarinfo):
1287 if list_func is None:
1288 self.logger.info(tarinfo.path)
1292 filter = partial(filter, self, list_func)
1294 tarobj.extractall(filter=filter)
1297 def restore_backup(self, target_path, backup_indexes_paths=[],
1298 backup_tar_path=None, restore_callback=None,
1304 - target_path: path to restore.
1305 - backup_indexes_paths: path to backup indexes, in descending date order.
1306 The indexes indicate the location of their respective backup volumes,
1307 and multiple indexes are needed to be able to restore diff backups.
1308 Note that this is an optional parameter: if not suplied, it will
1309 try to restore directly from backup_tar_path.
1310 - backup_tar_path: path to the backup tar file. Used as an alternative
1311 to backup_indexes_paths to restore directly from a tar file without
1312 using any file index. If it's a multivol tarfile, volume_name_func
1314 - restore_callback: callback function to be called during restore.
1315 This is passed to the helper and gets called for every file.
1317 NOTE: If you want to use an index to restore a backup, this function
1318 only supports to do so when the tarfile mode is either uncompressed or
1319 uses concat compress mode, because otherwise it would be very slow.
1321 NOTE: Indices are assumed to follow the same format as the index_mode
1322 specified in the constructor.
1324 Returns the list of files that could not be restored, if there were
1327 # check/sanitize input
1328 if not isinstance(target_path, str):
1329 raise Exception('Target path must be a string')
1331 if backup_indexes_paths is None and backup_tar_path == []:
1332 raise Exception("You have to either provide index paths or a tar path")
1334 if len(backup_indexes_paths) == 0:
1340 if not isinstance(backup_tar_path, str):
1341 raise Exception('Backup tar path must be a string')
1343 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1344 raise Exception('Source path "%s" does not exist or is not a '\
1345 'file' % backup_tar_path)
1347 if not os.access(backup_tar_path, os.R_OK):
1348 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1350 if not isinstance(backup_indexes_paths, list):
1351 raise Exception('backup_indexes_paths must be a list')
1353 if self.mode.startswith(':') or self.mode.startswith('|'):
1354 raise Exception('Restore only supports either uncompressed tars'
1355 ' or concat compression when restoring from an index, and '
1356 ' the open mode you provided is "%s"' % self.mode)
1358 for index in backup_indexes_paths:
1359 if not isinstance(index, str):
1360 raise Exception('indices must be strings')
1362 if not os.path.exists(index) or not os.path.isfile(index):
1363 raise Exception('Index path "%s" does not exist or is not a '\
1366 if not os.access(index, os.R_OK):
1367 raise Exception('Index path "%s" is not readable' % index)
1369 # try to create backup path if needed
1370 if not os.path.exists(target_path):
1371 os.makedirs(target_path)
1373 # make backup_tar_path absolute so that iterate_tar_path works fine
1374 if backup_tar_path and not os.path.isabs(backup_tar_path):
1375 backup_tar_path = os.path.abspath(backup_tar_path)
1378 os.chdir(target_path)
1380 # setup for decrypting payload
1381 if self.decryptor is None:
1382 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1385 index_it = self.iterate_tar_path(backup_tar_path)
1386 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
1387 tarobj=index_it.tar_obj)
1388 elif mode == "diff":
1389 helper = RestoreHelper(self, cwd, backup_indexes_paths, disaster=disaster)
1391 # get iterator from newest index at _data[0]
1392 index1 = helper._data[0]["path"]
1393 index_it = self.iterate_index_path(index1)
1394 except tarfile.DecryptionError as exn:
1395 self.logger.error("failed to decrypt file [%s]: %s; is this an "
1396 "actual encrypted index file?"
1397 % (index1, str (exn)))
1398 return [(index1, exn)]
1399 except Exception as exn:
1401 self.logger.error("failed to read file [%s]: %s; is this an "
1402 "actual index file?" % (index1, str (exn)))
1403 return [(index1, exn)]
1405 dir_it = self._recursive_walk_dir('.')
1406 dir_path_it = self.jsonize_path_iterator(dir_it)
1408 failed = [] # irrecoverable files
1410 # for each file to be restored, do:
1411 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1413 upath = dpath['path']
1414 op_type = dpath['type']
1416 upath = self.unprefixed(ipath['path'])
1417 op_type = ipath['type']
1420 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
1423 # if types of the file mismatch, the file needs to be deleted
1425 if ipath is not None and dpath is not None and\
1426 dpath['type'] != ipath['type']:
1427 helper.delete(upath)
1429 # if file not found in dpath, we can directly restore from index
1431 # if the file doesn't exist and it needs to be deleted, it
1432 # means that work is already done
1433 if ipath['path'].startswith('delete://'):
1436 self.logger.debug("restore %s" % ipath['path'])
1437 helper.restore(ipath, l_no, restore_callback)
1438 except Exception as e:
1439 iipath = ipath.get ("path", "")
1440 self.logger.error("FAILED to restore: {} ({})"
1442 if disaster is True:
1443 failed.append ((iipath, e))
1446 # if both files are equal, we have nothing to restore
1447 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1450 # we have to restore the file, but first we need to delete the
1451 # current existing file.
1452 # we don't delete the file if it's a directory, because it might
1453 # just have changed mtime, so it's quite inefficient to remove
1456 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
1457 helper.delete(upath)
1458 self.logger.debug("restore %s" % ipath['path'])
1460 helper.restore(ipath, l_no, restore_callback)
1461 except Exception as e:
1462 if disaster is False:
1464 failed.append ((ipath.get ("path", ""), e))
1467 # if the file is not in the index (so it comes from the target
1468 # directory) then we have to delete it
1470 self.logger.debug("delete %s" % upath)
1471 helper.delete(upath)
1473 helper.restore_directories_permissions()
1481 def recover_backup(self, target_path, backup_indexes_paths=[],
1482 restore_callback=None):
1484 Walk the index, extracting objects in disaster mode. Bad files are
1485 reported along with a reason.
1487 return self.restore_backup(target_path,
1488 backup_indexes_paths=backup_indexes_paths,
1492 def _parse_json_line(self, f, l_no):
1494 Read line from file like object and process it as JSON.
1499 j = json.loads(l.decode('UTF-8'))
1500 except UnicodeDecodeError as e:
1501 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1503 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1504 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1507 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1508 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1510 except ValueError as e:
1511 raise Exception("error parsing this json line "
1512 "(line number %d): %s" % (l_no, l))
1516 class RestoreHelper(object):
1518 Class used to help to restore files from indices
1521 # holds the dicts of data
1528 # list of directories to be restored. This is done as a last step, see
1529 # tarfile.extractall for details.
1534 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
1535 tarobj=None, disaster=False):
1537 Constructor opens the tars and init the data structures.
1541 - Index list must be provided in reverse order (newer first).
1542 - “newer first” apparently means that if there are n backups
1543 provided, the last full backup is at index n-1 and the most recent
1544 diff backup is at index 0.
1545 - Only the first, the second, and the last elements of
1546 ``index_list`` are relevant, others will not be accessed.
1547 - If no ``index_list`` is provided, both ``tarobj`` and
1548 ``backup_path`` must be passed.
1549 - If ``index_list`` is provided, the values of ``tarobj`` and
1550 ``backup_path`` are ignored.
1553 self._directories = []
1554 self._deltatar = deltatar
1556 self._password = deltatar.password
1557 self._crypto_key = deltatar.crypto_key
1558 self._decryptors = []
1559 self._disaster = disaster
1566 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1567 self.canchown = True
1569 self.canchown = False
1571 if index_list is not None:
1572 for index in index_list:
1573 is_full = index == index_list[-1]
1576 if self._password is not None:
1577 decryptor = crypto.Decrypt (password=self._password,
1578 key=self._crypto_key)
1580 # make paths absolute to avoid cwd problems
1581 if not os.path.isabs(index):
1582 index = os.path.normpath(os.path.join(cwd, index))
1592 last_itelement = None,
1594 new_volume_handler = partial(self.new_volume_handler,
1595 self._deltatar, self._cwd, is_full,
1596 os.path.dirname(index), decryptor),
1597 decryptor = decryptor
1599 self._data.append(s)
1601 # make paths absolute to avoid cwd problems
1602 if not os.path.isabs(backup_path):
1603 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
1605 # update the new_volume_handler of tar_obj
1606 tarobj.new_volume_handler = partial(self.new_volume_handler,
1607 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
1608 self._deltatar.decryptor)
1617 last_itelement = None,
1619 new_volume_handler = tarobj.new_volume_handler,
1620 decryptor = self._deltatar.decryptor
1622 self._data.append(s)
1627 Closes all open files
1629 for data in self._data:
1631 data['vol_fd'].close()
1632 data['vol_fd'] = None
1634 data['tarobj'].close()
1635 data['tarobj'] = None
1637 def delete(self, path):
1641 if not os.path.exists(path):
1644 # to preserve parent directory mtime, we save it
1645 parent_dir = os.path.dirname(path) or os.getcwd()
1646 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1648 if os.path.isdir(path) and not os.path.islink(path):
1653 # now we restore parent_directory mtime
1654 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1656 def restore(self, itpath, l_no, callback=None):
1658 Restore the path from the appropriate backup. Receives the current path
1659 from the newest (=first) index iterator. itpath must be not null.
1660 callback is a custom function that gets called for every file.
1662 NB: This function takes the attribute ``_data`` as input but will only
1663 ever use its first and, if available, second element. Anything else in
1664 ``._data[]`` will be ignored.
1666 path = itpath['path']
1668 # Calls the callback function
1672 if path.startswith('delete://'):
1673 # the file has previously been deleted already in restore_backup in
1674 # all cases so we just need to finish
1677 # get data from newest index (_data[0])
1678 data = self._data[0]
1679 upath = self._deltatar.unprefixed(path)
1681 # to preserve parent directory mtime, we save it
1682 parent_dir = os.path.dirname(upath) or os.getcwd()
1683 if not os.path.exists(parent_dir):
1684 os.makedirs(parent_dir)
1685 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1687 # if path is found in the newest index as to be snapshotted, deal with it
1689 if path.startswith('snapshot://'):
1691 self.restore_file(itpath, data, path, l_no, upath)
1695 # now we restore parent_directory mtime
1696 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1699 # we go from index to index, finding the path in the index, then finding
1700 # the index with the most recent snapshot of the file being restored
1702 # Right now we support diff backups, only. No incremental backups.
1703 # As a result _data[0] is always the diff backup index
1704 # and _data[1] the full backup index.
1705 if len(self._data) == 2:
1706 data = self._data[1]
1707 d, l_no, dpath = self.find_path_in_index(data, upath)
1709 self._deltatar.logger.warning('Error restoring file %s from '
1710 'index, not found in index %s' % (path, data['path']))
1713 cur_path = d.get('path', '')
1714 if cur_path.startswith('delete://'):
1715 self._deltatar.logger.warning(('Strange thing happened, file '
1716 '%s was listed in first index but deleted by another '
1717 'one. Path was ignored and untouched.') % path)
1719 elif cur_path.startswith('snapshot://'):
1720 # this code path is reached when the file is unchanged
1721 # in the newest index and therefore of type 'list://'
1722 self.restore_file(d, data, path, l_no, dpath)
1724 # now we restore parent_directory mtime
1725 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1728 # error code path is reached when:
1729 # a) we have more than two indexes (unsupported atm)
1730 # b) both indexes contain a list:// entry (logic error)
1731 # c) we have just one index and it also contains list://
1732 self._deltatar.logger.warning(('Error restoring file %s from index, '
1733 'snapshot not found in any index') % path)
1735 def find_path_in_index(self, data, upath):
1736 # NOTE: we restart the iterator sometimes because the iterator can be
1737 # walked over completely multiple times, for example if one path if not
1738 # found in one index and we have to go to the next index.
1739 it = data['iterator']
1741 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
1742 d, l_no = it.__next__()
1744 d = data['last_itelement']
1745 l_no = data['last_lno']
1748 dpath = self._deltatar.unprefixed(d.get('path', ''))
1750 data['last_itelement'] = d
1751 data['last_lno'] = l_no
1752 return d, l_no, dpath
1754 up, dp = self._deltatar.compare_indexes(upath, dpath)
1755 # any time upath should have appeared before current dpath, it means
1756 # upath is just not in this index and we should stop
1758 data['last_itelement'] = d
1759 data['last_lno'] = l_no
1763 d, l_no = it.__next__()
1764 except StopIteration:
1765 data['last_itelement'] = d
1766 data['last_lno'] = l_no
1769 def restore_directories_permissions(self):
1771 Restore directory permissions when everything have been restored
1778 self._directories.sort(key=operator.attrgetter('name'))
1779 self._directories.reverse()
1781 # Set correct owner, mtime and filemode on directories.
1782 for member in self._directories:
1783 dirpath = member.name
1785 os.chmod(dirpath, member.mode)
1786 os.utime(dirpath, (member.mtime, member.mtime))
1788 # We have to be root to do so.
1790 g = grp.getgrnam(member.gname)[2]
1794 u = pwd.getpwnam(member.uname)[2]
1798 if member.issym and hasattr(os, "lchown"):
1799 os.lchown(dirpath, u, g)
1801 os.chown(dirpath, u, g)
1802 except EnvironmentError:
1803 raise tarfile.ExtractError("could not change owner")
1805 except tarfile.ExtractError as e:
1806 self._deltatar.logger.warning('tarfile: %s' % e)
1809 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
1811 Handles the new volumes
1813 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1814 volume_number, guess_name=True)
1815 volume_path = os.path.join(backup_path, volume_name)
1817 # we convert relative paths into absolute because CWD is changed
1818 if not os.path.isabs(volume_path):
1819 volume_path = os.path.join(cwd, volume_path)
1820 tarobj.open_volume(volume_path, encryption=encryption)
1822 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
1824 Restores a snapshot of a file from a specific backup
1826 op_type = file_data.get('type', -1)
1827 member = file_data.get('member', None)
1828 ismember = bool(member)
1830 # when member is set, then we can assume everything is right and we
1831 # just have to restore the path
1833 vol_no = file_data.get('volume', -1)
1835 if not isinstance(vol_no, int) or vol_no < 0:
1836 self._deltatar.logger.warning('unrecognized type to be restored: '
1837 '%s, line %d' % (op_type, l_no))
1839 # setup the volume that needs to be read. only needed when member is
1841 if index_data['curr_vol_no'] != vol_no:
1842 index_data['curr_vol_no'] = vol_no
1843 backup_path = os.path.dirname(index_data['path'])
1844 vol_name = self._deltatar.volume_name_func(backup_path,
1845 index_data['is_full'], vol_no, guess_name=True)
1846 vol_path = os.path.join(backup_path, vol_name)
1847 if index_data['vol_fd']:
1848 index_data['vol_fd'].close()
1849 index_data['vol_fd'] = open(vol_path, 'rb')
1851 # force reopen of the tarobj because of new volume
1852 if index_data['tarobj']:
1853 index_data['tarobj'].close()
1854 index_data['tarobj'] = None
1856 # seek tarfile if needed
1857 offset = file_data.get('offset', -1)
1858 if index_data['tarobj']:
1860 member = index_data['tarobj'].__iter__().__next__()
1861 except tarfile.DecryptionError:
1863 except tarfile.CompressionError:
1866 if not member or member.path != file_data['path']:
1867 # force a seek and reopen
1868 index_data['tarobj'].close()
1869 index_data['tarobj'] = None
1871 # open the tarfile if needed
1872 if not index_data['tarobj']:
1873 index_data['vol_fd'].seek(offset)
1874 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1875 fileobj=index_data['vol_fd'],
1876 format=tarfile.GNU_FORMAT,
1877 concat='#' in self._deltatar.mode,
1878 encryption=index_data["decryptor"],
1879 new_volume_handler=index_data['new_volume_handler'],
1880 save_to_members=False,
1881 tolerant=self._disaster)
1883 member = index_data['tarobj'].__iter__().__next__()
1885 member.path = unprefixed_path
1886 member.name = unprefixed_path
1888 if op_type == 'directory':
1889 self.add_member_dir(member)
1890 member = copy.copy(member)
1891 member.mode = 0o0700
1893 # if it's an existing directory, we then don't need to recreate it
1894 # just set the right permissions, mtime and that kind of stuff
1895 if os.path.exists(member.path):
1899 # set current volume number in tarobj, otherwise the extraction of the
1900 # file might fail when trying to extract a multivolume member
1901 index_data['tarobj'].volume_number = index_data['curr_vol_no']
1903 def ignore_symlink (member, *_args):
1904 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
1906 # finally, restore the file
1907 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
1909 def add_member_dir(self, member):
1911 Add member dir to be restored at the end
1913 if not self.canchown:
1914 self._directories.append(DirItem(name=member.name, mode=member.mode,
1915 mtime=member.mtime))
1917 self._directories.append(DirItem(name=member.name, mode=member.mode,
1918 mtime=member.mtime, gname=member.gname, uname=member.uname,
1919 uid=member.uid, gid=member.gid, issym=member.issym()))
1921 class DirItem(object):
1922 def __init__(self, **kwargs):
1923 for k, v in kwargs.items():