3 # Copyright (C) 2013, 2014 Intra2net AG
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published
7 # by the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see
17 # <http://www.gnu.org/licenses/lgpl-3.0.html>
19 DELTATAR_HEADER_VERSION = 1
20 DELTATAR_PARAMETER_VERSION = 1
33 from functools import partial
38 class NullHandler(logging.Handler):
39 def emit(self, record):
43 logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
51 # encryption direction
52 CRYPTO_MODE_ENCRYPT = 0
53 CRYPTO_MODE_DECRYPT = 1
55 # The canonical extension for encrypted backup files regardless of the actual
56 # encryption parameters is “.pdtcrypt”. This is analogous to the encryption
57 # header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
58 # Since the introduction of the versioned header there no longer any need
59 # for encoding encryption parameters in the file extensions (“.aes128” and
61 PDTCRYPT_EXTENSION = "pdtcrypt"
65 AUXILIARY_FILE_INDEX = 0
66 AUXILIARY_FILE_INFO = 1
68 class DeltaTar(object):
70 Backup class used to create backups
73 # list of files to exclude in the backup creation or restore operation. It
74 # can contain python regular expressions.
77 # list of files to include in the backup creation or restore operation. It
78 # can contain python regular expressions. If empty, all files in the source
79 # path will be backed up (when creating a backup) or all the files in the
80 # backup will be restored (when restoring a backup), but if included_files
81 # is set then only the files include in the list will be processed.
84 # custom filter of files to be backed up (or restored). Unused and unset
85 # by default. The function receives a file path and must return a boolean.
88 # mode in which the delta will be created (when creating a backup) or
89 # opened (when restoring). Accepts modes analog to the tarfile library.
92 # used together with aes modes to encrypt and decrypt backups.
97 # parameter version to use when encrypting; note that this has no effect
98 # on decryption since the required settings are determined from the headers
99 crypto_version = DELTATAR_HEADER_VERSION
100 crypto_paramversion = None
102 # when encrypting or decrypting, these hold crypto handlers; created before
103 # establishing the Tarfile stream iff a password is supplied.
107 # python logger object.
110 # specifies the index mode in the same format as @param mode, but without
111 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
112 # that the index is encrypted if no password is given in the constructor.
115 # current time for this backup. Used for file names and file creation checks
118 # extra data to included in the header of the index file when creating a
122 # valid tarfile modes and their corresponding default file extension
123 __file_extensions_dict = {
132 '#gz.pdtcrypt': '.gz',
137 # valid index modes and their corresponding default file extension
138 __index_extensions_dict = {
142 'gz.pdtcrypt': '.gz',
146 # valid path prefixes
147 __path_prefix_list = [
153 def __init__(self, excluded_files=[], included_files=[],
154 filter_func=None, mode="", password=None,
155 crypto_key=None, nacl=None,
156 crypto_version=DELTATAR_HEADER_VERSION,
157 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
158 logger=None, index_mode=None, index_name_func=None,
159 volume_name_func=None):
161 Constructor. Configures the diff engine.
164 - excluded_files: list of files to exclude in the backup creation or
165 restore operation. It can contain python regular expressions.
167 - included_files: list of files to include in the backup creation or
168 restore operation. It can contain python regular expressions. If
169 empty, all files in the source path will be backed up (when creating a
170 backup) or all the files in the backup will be restored (when
171 restoring a backup), but if included_files is set then only the files
172 include in the list will be processed.
174 - filter_func: custom filter of files to be backed up (or restored).
175 Unused and unset by default. The function receives a file path and
176 must return a boolean.
178 - mode: mode in which the delta will be created (when creating a backup)
179 or opened (when restoring). Accepts the same modes as the tarfile
180 library. Valid modes are:
183 ':' open uncompressed
184 ':gz' open with gzip compression
185 ':bz2' open with bzip2 compression
186 '|' open an uncompressed stream of tar blocks
187 '|gz' open a gzip compressed stream of tar blocks
188 '|bz2' open a bzip2 compressed stream of tar blocks
189 '#gz' open a stream of gzip compressed tar blocks
191 - crypto_key: used to encrypt and decrypt backups. Encryption will
192 be enabled automatically if a key is supplied. Requires a salt to be
195 - nacl: salt that was used to derive the encryption key for embedding
196 in the PDTCRYPT header. Not needed when decrypting and when
197 encrypting with password.
199 - password: used to encrypt and decrypt backups. Encryption will be
200 enabled automatically if a password is supplied.
202 - crypto_version: version of the format, determining the kind of PDT
205 - crypto_paramversion: optionally request encryption conforming to
206 a specific parameter version. Defaults to the standard PDT value
207 which as of 2017 is the only one available.
209 - logger: python logger object. Optional.
211 - index_mode: specifies the index mode in the same format as @param
212 mode, but without the ':', '|' or '#' at the begining. If encryption
213 is requested it will extend to the auxiliary (index, info) files as
214 well. This is an optional parameter that will automatically mimic
215 @param mode by default if not provided. Valid modes are:
218 'gz' open with gzip compression
219 'bz2' open with bzip2 compression
221 - index_name_func: function that sets a custom name for the index file.
222 This function receives a flag to indicate whether the name will be
223 used for a full or diff backup. The backup path will be prepended to
226 - volume_name_func: function that defines the name of tar volumes. It
227 receives the backup_path, if it's a full backup and the volume number,
228 and must return the name for the corresponding volume name. Optional,
229 DeltaTar has default names for tar volumes.
232 if mode not in self.__file_extensions_dict:
233 raise Exception('Unrecognized extension mode=[%s] requested for files'
236 self.excluded_files = excluded_files
237 self.included_files = included_files
238 self.filter_func = filter_func
239 self.logger = logging.getLogger('deltatar.DeltaTar')
241 self.logger.addHandler(logger)
244 if crypto_key is not None:
245 self.crypto_key = crypto_key
246 self.nacl = nacl # encryption only
248 if password is not None:
249 self.password = password
251 if crypto_version is not None:
252 self.crypto_version = crypto_version
254 if crypto_paramversion is not None:
255 self.crypto_paramversion = crypto_paramversion
257 # generate index_mode
258 if index_mode is None:
264 elif mode not in self.__index_extensions_dict:
265 raise Exception('Unrecognized extension mode=[%s] requested for index'
268 self.index_mode = index_mode
269 self.current_time = datetime.datetime.now()
271 if index_name_func is not None:
272 self.index_name_func = index_name_func
274 if volume_name_func is not None:
275 self.volume_name_func = volume_name_func
277 def pick_extension(self, kind, mode=None):
279 Choose the extension depending on a) the kind of file given, b) the
280 processing mode, and c) the current encryption settings.
283 if kind == PDT_TYPE_ARCHIVE:
286 mode = self.__index_extensions_dict [self.index_mode]
288 if self.crypto_key is not None or self.password is not None:
289 ret += "." + PDTCRYPT_EXTENSION
292 def index_name_func(self, is_full): # pylint: disable=method-hidden
294 Callback for setting a custom name for the index file. Depending on
295 whether *is_full* is set, it will create a suitable name for a full
298 prefix = "bfull" if is_full else "bdiff"
299 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
300 extension = self.pick_extension \
302 self.__index_extensions_dict [self.index_mode])
304 return "%s-%s.index%s" % (prefix, date_str, extension)
306 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
307 is_full, volume_number,
310 function that defines the name of tar volumes. It receives the
311 backup_path, if it's a full backup and the volume number, and must return
312 the name for the corresponding volume name. Optional, DeltaTar has default
313 names for tar volumes.
315 If guess_name is activated, the file is intended not to be created but
316 to be found, and thus the date will be guessed.
318 prefix = "bfull" if is_full else "bdiff"
319 extension = self.pick_extension \
321 self.__file_extensions_dict [self.mode])
324 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
325 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
327 prefix = prefix + "-"
328 postfix = "-%03d%s" % (volume_number + 1, extension)
329 for f in os.listdir(backup_path):
330 if f.startswith(prefix) and f.endswith(postfix):
332 raise Exception("volume not found")
335 def filter_path(self, path, source_path="", is_dir=None):
337 Filters a path, given the source_path, using the filtering properties
338 set in the constructor.
339 The filtering order is:
340 1. included_files (if any)
342 3. filter_func (which must return whether the file is accepted or not)
345 if len(source_path) > 0:
346 # ensure that exactly one '/' at end of dir is also removed
347 source_path = source_path.rstrip(os.sep) + os.sep
348 path = path[len(source_path):]
350 # 1. filter included_files
352 if len(self.included_files) > 0:
354 for i in self.included_files:
355 # it can be either a regexp or a string
356 if isinstance(i, str):
357 # if the string matches, then continue
362 # if the string ends with / it's a directory, and if the
363 # path is contained in it, it is included
364 if i.endswith('/') and path.startswith(i):
368 # if the string doesn't end with /, add it and do the same
370 elif path.startswith(i + '/'):
374 # check for PARENT_MATCH
377 if not dir_path.endswith('/'):
380 if i.startswith(dir_path):
383 # if it's a reg exp, then we just check if it matches
384 elif isinstance(i, re._pattern_type):
389 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
391 if match == NO_MATCH:
394 # when a directory is in PARENT_MATCH, it doesn't matter if it's
395 # excluded. It's subfiles will be excluded, but the directory itself
397 if match != PARENT_MATCH:
398 for e in self.excluded_files:
399 # it can be either a regexp or a string
400 if isinstance(e, str):
401 # if the string matches, then exclude
405 # if the string ends with / it's a directory, and if the
406 # path starts with the directory, then exclude
407 if e.endswith('/') and path.startswith(e):
410 # if the string doesn't end with /, do the same check with
412 elif path.startswith(e + '/'):
415 # if it's a reg exp, then we just check if it matches
416 elif isinstance(e, re._pattern_type):
420 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
423 return self.filter_func(path)
427 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
429 Walk a directory recursively, yielding each file/directory
432 source_path = source_path.rstrip(os.sep)
437 beginning_size = len(source_path) + 1 # +1 for os.sep
439 queue = [source_path]
442 cur_path = queue.pop(0)
444 # it might have been removed in the mean time
445 if not os.path.exists(cur_path):
448 for filename in sorted(os.listdir(cur_path)):
449 child = os.path.join(cur_path, filename)
450 is_dir = os.path.isdir(child)
451 status = self.filter_path(child, source_path, is_dir)
452 if status == NO_MATCH:
454 if not os.access(child, os.R_OK):
455 self.logger.warning('Error accessing possibly locked file %s' % child)
459 yield child[beginning_size:]
461 if is_dir and (status == MATCH or status == PARENT_MATCH):
464 def _stat_dict(self, path):
466 Returns a dict with the stat data used to compare files
468 stinfo = os.stat(path)
469 mode = stinfo.st_mode
472 if stat.S_ISDIR(mode):
474 elif stat.S_ISREG(mode):
476 elif stat.S_ISLNK(mode):
483 u'mtime': int(stinfo.st_mtime),
484 u'ctime': int(stinfo.st_ctime),
485 u'uid': stinfo.st_uid,
486 u'gid': stinfo.st_gid,
487 u'inode': stinfo.st_ino,
488 u'size': stinfo.st_size
491 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
493 Return if the dicts are equal in the stat keys
495 keys = [u'type', u'mode',u'size', u'mtime',
496 # not restored: u'inode', u'ctime'
499 # only if user is root, then also check gid/uid. otherwise do not check it,
500 # because tarfile can chown in case of being superuser only
502 # also, skip the check in rpmbuild since the sources end up with the
503 # uid:gid of the packager while the extracted files are 0:0.
504 if hasattr(os, "geteuid") and os.geteuid() == 0 \
505 and os.getenv ("RPMBUILD_OPTIONS") is None:
509 if (not d1 and d2 != None) or (d1 != None and not d2):
512 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
515 type = d1.get('type', '')
518 # size doesn't matter for directories
519 if type == 'directory' and key == 'size':
521 if d1.get(key, -1) != d2.get(key, -2):
525 def prefixed(self, path, listsnapshot_equal=False):
527 if a path is not prefixed, return it prefixed
529 for prefix in self.__path_prefix_list:
530 if path.startswith(prefix):
531 if listsnapshot_equal and prefix == u'list://':
532 return u'snapshot://' + path[len(prefix):]
534 return u'snapshot://' + path
536 def unprefixed(self, path):
538 remove a path prefix if any
540 for prefix in self.__path_prefix_list:
541 if path.startswith(prefix):
542 return path[len(prefix):]
546 def initialize_encryption (self, mode):
547 password = self.password
548 key = self.crypto_key
551 if key is None and password is None:
553 if mode == CRYPTO_MODE_ENCRYPT:
554 return crypto.Encrypt (password=password,
557 version=self.crypto_version,
558 paramversion=self.crypto_paramversion)
559 if mode == CRYPTO_MODE_DECRYPT:
560 return crypto.Decrypt (password=password, key=key)
562 raise Exception ("invalid encryption mode [%r]" % mode)
565 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
567 Given the specified configuration, opens a file for reading or writing,
568 inheriting the encryption and compression settings from the backup.
569 Returns a file object ready to use.
571 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
574 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
575 Both the info and the auxiliary file have a globally
576 unique, constant counter value.
579 if self.index_mode.startswith('gz'):
581 elif self.index_mode.startswith('bz2'):
589 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
591 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
593 if crypto_ctx is not None:
594 if kind == AUXILIARY_FILE_INFO:
595 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
596 elif kind == AUXILIARY_FILE_INDEX:
597 enccounter = crypto.AES_GCM_IV_CNT_INDEX
599 raise Exception ("invalid kind of aux file %r" % kind)
601 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
602 bufsize=tarfile.RECORDSIZE, fileobj=None,
603 encryption=crypto_ctx, enccounter=enccounter)
608 def create_full_backup(self, source_path, backup_path,
609 max_volume_size=None, extra_data=dict()):
611 Creates a full backup.
614 - source_path: source path to the directory to back up.
615 - backup_path: path where the back up will be stored. Backup path will
616 be created if not existent.
617 - max_volume_size: maximum volume size in megabytes. Used to split the
618 backup in volumes. Optional (won't split in volumes by default).
619 - extra_data: a json-serializable dictionary with information that you
620 want to be included in the header of the index file
623 if not isinstance(source_path, str):
624 raise Exception('Source path must be a string')
626 if not isinstance(backup_path, str):
627 raise Exception('Backup path must be a string')
629 if not os.path.exists(source_path) or not os.path.isdir(source_path):
630 raise Exception('Source path "%s" does not exist or is not a '\
631 'directory' % source_path)
633 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
634 max_volume_size < 1):
635 raise Exception('max_volume_size must be a positive integer')
636 if max_volume_size != None:
637 max_volume_size = max_volume_size*1024*1024
639 if not isinstance(extra_data, dict):
640 raise Exception('extra_data must be a dictionary')
643 extra_data_str = json.dumps(extra_data)
645 raise Exception('extra_data is not json-serializable')
647 if not os.access(source_path, os.R_OK):
648 raise Exception('Source path "%s" is not readable' % source_path)
650 # try to create backup path if needed
651 if not os.path.exists(backup_path):
652 os.makedirs(backup_path)
654 if not os.access(backup_path, os.W_OK):
655 raise Exception('Backup path "%s" is not writeable' % backup_path)
657 if source_path.endswith('/'):
658 source_path = source_path[:-1]
660 if backup_path.endswith('/'):
661 backup_path = backup_path[:-1]
663 # update current time
664 self.current_time = datetime.datetime.now()
666 if self.mode not in self.__file_extensions_dict:
667 raise Exception('Unrecognized extension')
669 # setup for encrypting payload
670 if self.encryptor is None:
671 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
673 # some initialization
676 # generate the first volume name
677 vol_name = self.volume_name_func(backup_path, True, 0)
678 tarfile_path = os.path.join(backup_path, vol_name)
681 index_name = self.index_name_func(True)
682 index_path = os.path.join(backup_path, index_name)
683 index_sink = self.open_auxiliary_file(index_path, 'w')
687 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
689 Handles the new volumes
691 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
692 volume_path = os.path.join(backup_path, volume_name)
693 deltarobj.vol_no = volume_number
695 # we convert relative paths into absolute because CWD is changed
696 if not os.path.isabs(volume_path):
697 volume_path = os.path.join(cwd, volume_path)
699 if tarobj.fileobj is not None:
700 tarobj.fileobj.close()
702 deltarobj.logger.debug("opening volume %s" % volume_path)
704 tarobj.open_volume(volume_path, encryption=encryption)
706 # wraps some args from context into the handler
707 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
709 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
711 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
712 # calculate checksum and write into the stream
713 crc = binascii.crc32(s) & 0xFFFFffff
716 # start creating the tarfile
717 tarobj = tarfile.TarFile.open(tarfile_path,
718 mode='w' + self.mode,
719 format=tarfile.GNU_FORMAT,
720 concat='#' in self.mode,
721 encryption=self.encryptor,
722 max_volume_size=max_volume_size,
723 new_volume_handler=new_volume_handler,
724 save_to_members=False,
726 os.chdir(source_path)
728 # for each file to be in the backup, do:
729 for path in self._recursive_walk_dir('.'):
730 # calculate stat dict for current file
731 statd = self._stat_dict(path)
732 statd['path'] = u'snapshot://' + statd['path']
733 statd['volume'] = self.vol_no
736 tarobj.add(path, arcname = statd['path'], recursive=False)
738 # retrieve file offset
739 statd['offset'] = tarobj.get_last_member_offset()
740 self.logger.debug("backup %s" % statd['path'])
742 # store the stat dict in the index
743 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
744 crc = binascii.crc32(s, crc) & 0xffffffff
747 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
748 crc = binascii.crc32(s, crc) & 0xffffffff
750 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
755 index_sink.close (close_fileobj=True)
757 def create_diff_backup(self, source_path, backup_path, previous_index_path,
758 max_volume_size=None, extra_data=dict()):
763 - source_path: source path to the directory to back up.
764 - backup_path: path where the back up will be stored. Backup path will
765 be created if not existent.
766 - previous_index_path: index of the previous backup, needed to know
767 which files changed since then.
768 - max_volume_size: maximum volume size in megabytes (MB). Used to split
769 the backup in volumes. Optional (won't split in volumes by default).
771 NOTE: previous index is assumed to follow exactly the same format as
772 the index_mode setup in the constructor.
774 # check/sanitize input
775 if not isinstance(source_path, str):
776 raise Exception('Source path must be a string')
778 if not isinstance(backup_path, str):
779 raise Exception('Backup path must be a string')
781 if not os.path.exists(source_path) or not os.path.isdir(source_path):
782 raise Exception('Source path "%s" does not exist or is not a '\
783 'directory' % source_path)
785 if not isinstance(extra_data, dict):
786 raise Exception('extra_data must be a dictionary')
789 extra_data_str = json.dumps(extra_data)
791 raise Exception('extra_data is not json-serializable')
793 if not os.access(source_path, os.R_OK):
794 raise Exception('Source path "%s" is not readable' % source_path)
796 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
797 max_volume_size < 1):
798 raise Exception('max_volume_size must be a positive integer')
799 if max_volume_size != None:
800 max_volume_size = max_volume_size*1024*1024
802 if not isinstance(previous_index_path, str):
803 raise Exception('previous_index_path must be A string')
805 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
806 raise Exception('Index path "%s" does not exist or is not a '\
807 'file' % previous_index_path)
809 if not os.access(previous_index_path, os.R_OK):
810 raise Exception('Index path "%s" is not readable' % previous_index_path)
812 # try to create backup path if needed
813 if not os.path.exists(backup_path):
814 os.makedirs(backup_path)
816 if not os.access(backup_path, os.W_OK):
817 raise Exception('Backup path "%s" is not writeable' % backup_path)
819 if source_path.endswith('/'):
820 source_path = source_path[:-1]
822 if backup_path.endswith('/'):
823 backup_path = backup_path[:-1]
825 # update current time
826 self.current_time = datetime.datetime.now()
828 if self.mode not in self.__file_extensions_dict:
829 raise Exception('Unrecognized extension')
831 # setup for encrypting payload
832 if self.encryptor is None:
833 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
835 # some initialization
838 # generate the first volume name
839 vol_name = self.volume_name_func(backup_path, is_full=False,
841 tarfile_path = os.path.join(backup_path, vol_name)
846 index_name = self.index_name_func(is_full=False)
847 index_path = os.path.join(backup_path, index_name)
848 index_sink = self.open_auxiliary_file(index_path, 'w')
850 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
852 Handles the new volumes
854 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
855 volume_number=volume_number)
856 volume_path = os.path.join(backup_path, volume_name)
857 deltarobj.vol_no = volume_number
859 # we convert relative paths into absolute because CWD is changed
860 if not os.path.isabs(volume_path):
861 volume_path = os.path.join(cwd, volume_path)
863 deltarobj.logger.debug("opening volume %s" % volume_path)
864 tarobj.open_volume(volume_path)
866 # wraps some args from context into the handler
867 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
869 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
871 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
872 # calculate checksum and write into the stream
873 crc = binascii.crc32(s) & 0xFFFFffff
876 # start creating the tarfile
877 tarobj = tarfile.TarFile.open(tarfile_path,
878 mode='w' + self.mode,
879 format=tarfile.GNU_FORMAT,
880 concat='#' in self.mode,
881 encryption=self.encryptor,
882 max_volume_size=max_volume_size,
883 new_volume_handler=new_volume_handler,
884 save_to_members=False,
888 # create the iterators, first the previous index iterator, then the
889 # source path directory iterator and collate and iterate them
890 if not os.path.isabs(previous_index_path):
891 previous_index_path = os.path.join(cwd, previous_index_path)
892 index_it = self.iterate_index_path(previous_index_path)
894 os.chdir(source_path)
895 dir_it = self._recursive_walk_dir('.')
896 dir_path_it = self.jsonize_path_iterator(dir_it)
904 # for each file to be in the backup, do:
905 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
907 # if file is not in the index, it means it's a new file, so we have
912 # if the file is not in the directory iterator, it means that it has
913 # been deleted, so we need to mark it as such
916 # if the file is in both iterators, it means it might have either
917 # not changed (in which case we will just list it in our index but
918 # it will not be included in the tar file), or it might have
919 # changed, in which case we will snapshot it.
920 elif ipath and dpath:
921 if self._equal_stat_dicts(ipath, dpath):
925 # TODO: when creating chained backups (i.e. diffing from another
926 # diff), we will need to detect the type of action in the previous
927 # index, because if it was delete and dpath is None, we should
930 if action == 'snapshot':
931 # calculate stat dict for current file
933 stat['path'] = "snapshot://" + dpath['path']
934 stat['volume'] = self.vol_no
936 self.logger.debug("[STORE] %s" % dpath['path'])
939 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
941 # retrieve file offset
942 stat['offset'] = tarobj.get_last_member_offset()
943 elif action == 'delete':
944 path = self.unprefixed(ipath['path'])
946 u'path': u'delete://' + path,
947 u'type': ipath['type']
949 self.logger.debug("[DELETE] %s" % path)
951 # mark it as deleted in the backup
952 tarobj.add("/dev/null", arcname=stat['path'])
953 elif action == 'list':
955 path = self.unprefixed(ipath['path'])
956 stat['path'] = u'list://' + path
957 # unchanged files do not enter in the backup, only in the index
958 self.logger.debug("[UNCHANGED] %s" % path)
961 self.logger.warning('unknown action in create_diff_backup: {0}'
966 # store the stat dict in the index
967 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
968 crc = binascii.crc32(s, crc) & 0xffffffff
971 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
972 crc = binascii.crc32(s, crc) & 0xffffffff
974 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
983 def iterate_index_path(self, index_path):
985 Returns an index iterator. Internally, it uses a classic iterator class.
986 We do that instead of just yielding so that the iterator object can have
987 an additional function to close the file descriptor that is opened in
991 class IndexPathIterator(object):
992 def __init__(self, delta_tar, index_path):
993 self.delta_tar = delta_tar
994 self.index_path = index_path
996 self.extra_data = dict()
1006 def __enter__(self):
1008 Allows this iterator to be used with the "with" statement
1011 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
1012 # check index header
1013 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1014 if j.get("type", '') != 'python-delta-tar-index' or\
1015 j.get('version', -1) != 1:
1016 raise Exception("invalid index file format: %s" % json.dumps(j))
1018 self.extra_data = j.get('extra_data', dict())
1020 # find BEGIN-FILE-LIST, ignore other headers
1022 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1023 if j.get('type', '') == 'BEGIN-FILE-LIST':
1027 def __exit__(self, type, value, tb):
1029 Allows this iterator to be used with the "with" statement
1036 # read each file in the index and process it to do the restore
1040 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1041 except Exception as e:
1046 op_type = j.get('type', '')
1048 # when we detect the end of the list, break the loop
1049 if op_type == 'END-FILE-LIST':
1055 if op_type not in ['directory', 'file', 'link']:
1056 self.delta_tar.logger.warning('unrecognized type to be '
1057 'restored: %s, line %d' % (op_type, l_no))
1059 return self.__next__()
1063 return IndexPathIterator(self, index_path)
1065 def iterate_tar_path(self, tar_path, new_volume_handler=None):
1067 Returns a tar iterator that iterates jsonized member items that contain
1068 an additional "member" field, used by RestoreHelper.
1070 class TarPathIterator(object):
1071 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
1072 self.delta_tar = delta_tar
1073 self.tar_path = tar_path
1075 self.last_member = None
1076 self.new_volume_handler = new_volume_handler
1084 self.tar_obj.close()
1086 def __enter__(self):
1088 Allows this iterator to be used with the "with" statement
1090 if self.tar_obj is None:
1092 if self.delta_tar.password is not None:
1093 decryptor = crypto.Decrypt \
1094 (password=self.delta_tar.password,
1095 key=self.delta_tar.crypto_key)
1096 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1097 mode='r' + self.delta_tar.mode,
1098 format=tarfile.GNU_FORMAT,
1099 concat='#' in self.delta_tar.mode,
1100 encryption=decryptor,
1101 new_volume_handler=self.new_volume_handler,
1102 save_to_members=False,
1106 def __exit__(self, type, value, tb):
1108 Allows this iterator to be used with the "with" statement
1111 self.tar_obj.close()
1116 Read each member and return it as a stat dict
1118 tarinfo = self.tar_obj.__iter__().__next__()
1119 # NOTE: here we compare if tarinfo.path is the same as before
1120 # instead of comparing the tarinfo object itself because the
1121 # object itself might change for multivol tarinfos
1122 if tarinfo is None or (self.last_member is not None and\
1123 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
1126 self.last_member = tarinfo
1129 if tarinfo.isfile():
1131 elif tarinfo.isdir():
1133 elif tarinfo.islnk() or tarinfo.issym():
1138 u'path': tarinfo.path,
1139 u'mode': tarinfo.mode,
1140 u'mtime': tarinfo.mtime,
1141 u'ctime': -1, # cannot restore
1142 u'uid': tarinfo.uid,
1143 u'gid': tarinfo.gid,
1144 u'inode': -1, # cannot restore
1145 u'size': tarinfo.size,
1149 return TarPathIterator(self, tar_path, new_volume_handler)
1151 def jsonize_path_iterator(self, iter, strip=0):
1153 converts the yielded items of an iterator into json path lines.
1155 strip: Strip the smallest prefix containing num leading slashes from
1160 path = iter.__next__()
1162 yield self._stat_dict(path), 0
1164 st = self._stat_dict(path)
1165 st['path'] = "/".join(path.split("/")[strip:])
1167 except StopIteration:
1170 def iterate_disaster_index (self, index):
1172 Mimick the behavior of the other object iterators, just with the inputs
1173 supplied directly as *index*.
1176 class RawIndexIterator(object):
1177 def __init__(self, delta_tar, index):
1178 self.delta_tar = delta_tar
1188 def __enter__(self):
1190 Allows this iterator to be used with the "with" statement
1192 self.iter = self.index.__iter__ ()
1195 def __exit__(self, type, value, tb):
1197 Allows this iterator to be used with the "with" statement
1201 idxent = self.iter.__next__ ()
1204 return RawIndexIterator(self, index)
1206 def collate_iterators(self, it1, it2):
1208 Collate two iterators, so that it returns pairs of the items of each
1209 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1210 when there's no match for the items in the other iterator.
1212 It assumes that the items in both lists are ordered in the same way.
1215 elem1, elem2 = None, None
1219 elem1, l_no = it1.__next__()
1220 except StopIteration:
1222 yield (None, elem2, l_no)
1224 if isinstance(elem2, tuple):
1226 yield (None, elem2, l_no)
1230 elem2 = it2.__next__()
1231 if isinstance(elem2, tuple):
1233 except StopIteration:
1235 yield (elem1, None, l_no)
1236 for elem1, l_no in it1:
1237 yield (elem1, None, l_no)
1240 index1 = self.unprefixed(elem1['path'])
1241 index2 = self.unprefixed(elem2['path'])
1242 i1, i2 = self.compare_indexes(index1, index2)
1244 yield1 = yield2 = None
1251 yield (yield1, yield2, l_no)
1253 def compare_indexes(self, index1, index2):
1255 Compare iterator indexes and return a tuple in the following form:
1256 if index1 < index2, returns (index1, None)
1257 if index1 == index2 returns (index1, index2)
1258 else: returns (None, index2)
1260 l1 = index1.split('/')
1261 l2 = index2.split('/')
1262 length = len(l2) - len(l1)
1265 return (index1, None)
1267 return (None, index2)
1269 for i1, i2 in zip(l1, l2):
1271 return (index1, None)
1273 return (None, index2)
1275 return (index1, index2)
1277 def list_backup(self, backup_tar_path, list_func=None):
1278 if not isinstance(backup_tar_path, str):
1279 raise Exception('Backup tar path must be a string')
1281 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1282 raise Exception('Source path "%s" does not exist or is not a '\
1283 'file' % backup_tar_path)
1285 if not os.access(backup_tar_path, os.R_OK):
1286 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1290 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
1292 Handles the new volumes
1294 volume_name = deltarobj.volume_name_func(backup_path, True,
1295 volume_number, guess_name=True)
1296 volume_path = os.path.join(backup_path, volume_name)
1298 # we convert relative paths into absolute because CWD is changed
1299 if not os.path.isabs(volume_path):
1300 volume_path = os.path.join(cwd, volume_path)
1301 tarobj.open_volume(volume_path, encryption=encryption)
1303 if self.decryptor is None:
1304 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1306 backup_path = os.path.dirname(backup_tar_path)
1307 if not os.path.isabs(backup_path):
1308 backup_path = os.path.join(cwd, backup_path)
1309 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
1311 tarobj = tarfile.TarFile.open(backup_tar_path,
1312 mode='r' + self.mode,
1313 format=tarfile.GNU_FORMAT,
1314 concat='#' in self.mode,
1315 encryption=self.decryptor,
1316 new_volume_handler=new_volume_handler,
1317 save_to_members=False,
1320 def filter(cls, list_func, tarinfo):
1321 if list_func is None:
1322 self.logger.info(tarinfo.path)
1326 filter = partial(filter, self, list_func)
1328 tarobj.extractall(filter=filter)
1331 def restore_backup(self, target_path, backup_indexes_paths=[],
1332 backup_tar_path=None, restore_callback=None,
1333 disaster=tarfile.TOLERANCE_STRICT, backup_index=None):
1338 - target_path: path to restore.
1339 - backup_indexes_paths: path to backup indexes, in descending date order.
1340 The indexes indicate the location of their respective backup volumes,
1341 and multiple indexes are needed to be able to restore diff backups.
1342 Note that this is an optional parameter: if not suplied, it will
1343 try to restore directly from backup_tar_path.
1344 - backup_tar_path: path to the backup tar file. Used as an alternative
1345 to backup_indexes_paths to restore directly from a tar file without
1346 using any file index. If it's a multivol tarfile, volume_name_func
1348 - restore_callback: callback function to be called during restore.
1349 This is passed to the helper and gets called for every file.
1351 NOTE: If you want to use an index to restore a backup, this function
1352 only supports to do so when the tarfile mode is either uncompressed or
1353 uses concat compress mode, because otherwise it would be very slow.
1355 NOTE: Indices are assumed to follow the same format as the index_mode
1356 specified in the constructor.
1358 Returns the list of files that could not be restored, if there were
1361 # check/sanitize input
1362 if not isinstance(target_path, str):
1363 raise Exception('Target path must be a string')
1365 if backup_indexes_paths is None and backup_tar_path == []:
1366 raise Exception("You have to either provide index paths or a tar path")
1368 if isinstance (backup_index, list) is True:
1370 elif len(backup_indexes_paths) == 0:
1376 if not isinstance(backup_tar_path, str):
1377 raise Exception('Backup tar path must be a string')
1379 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1380 raise Exception('Source path "%s" does not exist or is not a '\
1381 'file' % backup_tar_path)
1383 if not os.access(backup_tar_path, os.R_OK):
1384 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1386 if not isinstance(backup_indexes_paths, list):
1387 raise Exception('backup_indexes_paths must be a list')
1389 if self.mode.startswith(':') or self.mode.startswith('|'):
1390 raise Exception('Restore only supports either uncompressed tars'
1391 ' or concat compression when restoring from an index, and '
1392 ' the open mode you provided is "%s"' % self.mode)
1394 for index in backup_indexes_paths:
1395 if not isinstance(index, str):
1396 raise Exception('indices must be strings')
1398 if not os.path.exists(index) or not os.path.isfile(index):
1399 raise Exception('Index path "%s" does not exist or is not a '\
1402 if not os.access(index, os.R_OK):
1403 raise Exception('Index path "%s" is not readable' % index)
1405 # try to create backup path if needed
1406 if not os.path.exists(target_path):
1407 os.makedirs(target_path)
1409 # make backup_tar_path absolute so that iterate_tar_path works fine
1410 if backup_tar_path and not os.path.isabs(backup_tar_path):
1411 backup_tar_path = os.path.abspath(backup_tar_path)
1414 os.chdir(target_path)
1416 # setup for decrypting payload
1417 if self.decryptor is None:
1418 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1421 index_it = self.iterate_tar_path(backup_tar_path)
1422 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
1423 tarobj=index_it.tar_obj)
1424 elif mode == "diff":
1425 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1428 # get iterator from newest index at _data[0]
1429 index1 = helper._data[0]["path"]
1430 index_it = self.iterate_index_path(index1)
1431 except tarfile.DecryptionError as exn:
1432 self.logger.error("failed to decrypt file [%s]: %s; is this an "
1433 "actual encrypted index file?"
1434 % (index1, str (exn)))
1435 return [(index1, exn)]
1436 except Exception as exn:
1438 self.logger.error("failed to read file [%s]: %s; is this an "
1439 "actual index file?" % (index1, str (exn)))
1440 return [(index1, exn)]
1441 elif mode == "disaster":
1442 index_it = self.iterate_disaster_index (backup_index)
1443 helper = RestoreHelper (self, cwd, backup_path=backup_tar_path,
1444 backup_index=backup_index,
1448 dir_it = self._recursive_walk_dir('.')
1449 dir_path_it = self.jsonize_path_iterator(dir_it)
1451 failed = [] # irrecoverable files
1453 # for each file to be restored, do:
1454 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1456 upath = dpath['path']
1457 op_type = dpath['type']
1459 upath = self.unprefixed(ipath['path'])
1460 op_type = ipath['type']
1463 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
1466 # if types of the file mismatch, the file needs to be deleted
1468 if ipath is not None and dpath is not None and\
1469 dpath['type'] != ipath['type']:
1470 helper.delete(upath)
1472 # if file not found in dpath, we can directly restore from index
1474 # if the file doesn't exist and it needs to be deleted, it
1475 # means that work is already done
1476 if ipath['path'].startswith('delete://'):
1479 self.logger.debug("restore %s" % ipath['path'])
1480 helper.restore(ipath, l_no, restore_callback)
1481 except Exception as e:
1482 iipath = ipath.get ("path", "")
1483 self.logger.error("FAILED to restore: {} ({})"
1485 if disaster != tarfile.TOLERANCE_STRICT:
1486 failed.append ((iipath, e))
1489 # if both files are equal, we have nothing to restore
1490 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1493 # we have to restore the file, but first we need to delete the
1494 # current existing file.
1495 # we don't delete the file if it's a directory, because it might
1496 # just have changed mtime, so it's quite inefficient to remove
1499 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
1500 helper.delete(upath)
1501 self.logger.debug("restore %s" % ipath['path'])
1503 helper.restore(ipath, l_no, restore_callback)
1504 except Exception as e:
1505 if disaster == tarfile.TOLERANCE_STRICT:
1507 failed.append ((ipath.get ("path", ""), e))
1510 # if the file is not in the index (so it comes from the target
1511 # directory) then we have to delete it
1513 self.logger.debug("delete %s" % upath)
1514 helper.delete(upath)
1516 helper.restore_directories_permissions()
1524 def recover_backup(self, target_path, backup_indexes_paths=[],
1525 restore_callback=None):
1527 Walk the index, extracting objects in disaster mode. Bad files are
1528 reported along with a reason.
1530 return self.restore_backup(target_path,
1531 backup_indexes_paths=backup_indexes_paths,
1532 disaster=tarfile.TOLERANCE_RECOVER)
1535 def rescue_backup(self, target_path, backup_tar_path,
1536 restore_callback=None):
1538 More aggressive “unfsck” mode: do not rely on the index data as the
1539 files may be corrupt; skim files for header-like information and
1540 attempt to retrieve the data.
1542 def gen_volume_name (nvol):
1543 return os.path.join (os.path.dirname (backup_tar_path),
1544 self.volume_name_func (backup_tar_path,
1548 backup_index = tarfile.gen_rescue_index (gen_volume_name,
1550 password=self.password,
1551 key=self.crypto_key)
1553 return self.restore_backup(target_path,
1554 backup_index=backup_index,
1555 backup_tar_path=backup_tar_path,
1556 disaster=tarfile.TOLERANCE_RESCUE)
1559 def _parse_json_line(self, f, l_no):
1561 Read line from file like object and process it as JSON.
1566 j = json.loads(l.decode('UTF-8'))
1567 except UnicodeDecodeError as e:
1568 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1570 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1571 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1574 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1575 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1577 except ValueError as e:
1578 raise Exception("error parsing this json line "
1579 "(line number %d): %s" % (l_no, l))
1583 class RestoreHelper(object):
1585 Class used to help to restore files from indices
1588 # holds the dicts of data
1595 # list of directories to be restored. This is done as a last step, see
1596 # tarfile.extractall for details.
1599 _disaster = tarfile.TOLERANCE_STRICT
1601 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
1602 backup_index=None, tarobj=None,
1603 disaster=tarfile.TOLERANCE_STRICT):
1605 Constructor opens the tars and init the data structures.
1609 - Index list must be provided in reverse order (newer first).
1610 - “newer first” apparently means that if there are n backups
1611 provided, the last full backup is at index n-1 and the most recent
1612 diff backup is at index 0.
1613 - Only the first, the second, and the last elements of
1614 ``index_list`` are relevant, others will not be accessed.
1615 - If no ``index_list`` is provided, both ``tarobj`` and
1616 ``backup_path`` must be passed.
1617 - If ``index_list`` is provided, the values of ``tarobj`` and
1618 ``backup_path`` are ignored.
1621 self._directories = []
1622 self._deltatar = deltatar
1624 self._password = deltatar.password
1625 self._crypto_key = deltatar.crypto_key
1626 self._decryptors = []
1627 self._disaster = disaster
1634 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1635 self.canchown = True
1637 self.canchown = False
1639 if isinstance (backup_index, list) is True:
1640 decryptor = self._deltatar.decryptor
1642 [{ "curr_vol_no" : None
1646 , "path" : backup_path
1649 , "last_itelement" : None
1651 , "new_volume_handler" :
1652 partial(self.new_volume_handler,
1653 self._deltatar, self._cwd, True,
1654 os.path.dirname(backup_path), decryptor)
1655 , "decryptor" : decryptor
1657 elif index_list is not None:
1658 for index in index_list:
1659 is_full = index == index_list[-1]
1662 if self._password is not None:
1663 decryptor = crypto.Decrypt (password=self._password,
1664 key=self._crypto_key)
1666 # make paths absolute to avoid cwd problems
1667 if not os.path.isabs(index):
1668 index = os.path.normpath(os.path.join(cwd, index))
1678 last_itelement = None,
1680 new_volume_handler = partial(self.new_volume_handler,
1681 self._deltatar, self._cwd, is_full,
1682 os.path.dirname(index), decryptor),
1683 decryptor = decryptor
1685 self._data.append(s)
1687 # make paths absolute to avoid cwd problems
1688 if not os.path.isabs(backup_path):
1689 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
1691 # update the new_volume_handler of tar_obj
1692 tarobj.new_volume_handler = partial(self.new_volume_handler,
1693 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
1694 self._deltatar.decryptor)
1703 last_itelement = None,
1705 new_volume_handler = tarobj.new_volume_handler,
1706 decryptor = self._deltatar.decryptor
1708 self._data.append(s)
1713 Closes all open files
1715 for data in self._data:
1717 data['vol_fd'].close()
1718 data['vol_fd'] = None
1720 data['tarobj'].close()
1721 data['tarobj'] = None
1723 def delete(self, path):
1727 if not os.path.exists(path):
1730 # to preserve parent directory mtime, we save it
1731 parent_dir = os.path.dirname(path) or os.getcwd()
1732 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1734 if os.path.isdir(path) and not os.path.islink(path):
1739 # now we restore parent_directory mtime
1740 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1742 def restore(self, itpath, l_no, callback=None):
1744 Restore the path from the appropriate backup. Receives the current path
1745 from the newest (=first) index iterator. itpath must be not null.
1746 callback is a custom function that gets called for every file.
1748 NB: This function takes the attribute ``_data`` as input but will only
1749 ever use its first and, if available, second element. Anything else in
1750 ``._data[]`` will be ignored.
1752 path = itpath['path']
1754 # Calls the callback function
1758 if path.startswith('delete://'):
1759 # the file has previously been deleted already in restore_backup in
1760 # all cases so we just need to finish
1763 # get data from newest index (_data[0])
1764 data = self._data[0]
1765 upath = self._deltatar.unprefixed(path)
1767 # to preserve parent directory mtime, we save it
1768 parent_dir = os.path.dirname(upath) or os.getcwd()
1769 if not os.path.exists(parent_dir):
1770 os.makedirs(parent_dir)
1771 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1773 # if path is found in the newest index as to be snapshotted, deal with it
1775 if path.startswith('snapshot://'):
1776 self.restore_file(itpath, data, path, l_no, upath)
1778 # now we restore parent_directory mtime
1779 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1782 # we go from index to index, finding the path in the index, then finding
1783 # the index with the most recent snapshot of the file being restored
1785 # Right now we support diff backups, only. No incremental backups.
1786 # As a result _data[0] is always the diff backup index
1787 # and _data[1] the full backup index.
1788 if len(self._data) == 2:
1789 data = self._data[1]
1790 d, l_no, dpath = self.find_path_in_index(data, upath)
1792 self._deltatar.logger.warning('Error restoring file %s from '
1793 'index, not found in index %s' % (path, data['path']))
1796 cur_path = d.get('path', '')
1797 if cur_path.startswith('delete://'):
1798 self._deltatar.logger.warning(('Strange thing happened, file '
1799 '%s was listed in first index but deleted by another '
1800 'one. Path was ignored and untouched.') % path)
1802 elif cur_path.startswith('snapshot://'):
1803 # this code path is reached when the file is unchanged
1804 # in the newest index and therefore of type 'list://'
1805 self.restore_file(d, data, path, l_no, dpath)
1807 # now we restore parent_directory mtime
1808 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1811 # error code path is reached when:
1812 # a) we have more than two indexes (unsupported atm)
1813 # b) both indexes contain a list:// entry (logic error)
1814 # c) we have just one index and it also contains list://
1815 self._deltatar.logger.warning(('Error restoring file %s from index, '
1816 'snapshot not found in any index') % path)
1818 def find_path_in_index(self, data, upath):
1819 # NOTE: we restart the iterator sometimes because the iterator can be
1820 # walked over completely multiple times, for example if one path if not
1821 # found in one index and we have to go to the next index.
1822 it = data['iterator']
1824 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
1825 d, l_no = it.__next__()
1827 d = data['last_itelement']
1828 l_no = data['last_lno']
1831 dpath = self._deltatar.unprefixed(d.get('path', ''))
1833 data['last_itelement'] = d
1834 data['last_lno'] = l_no
1835 return d, l_no, dpath
1837 up, dp = self._deltatar.compare_indexes(upath, dpath)
1838 # any time upath should have appeared before current dpath, it means
1839 # upath is just not in this index and we should stop
1841 data['last_itelement'] = d
1842 data['last_lno'] = l_no
1846 d, l_no = it.__next__()
1847 except StopIteration:
1848 data['last_itelement'] = d
1849 data['last_lno'] = l_no
1852 def restore_directories_permissions(self):
1854 Restore directory permissions when everything have been restored
1861 self._directories.sort(key=operator.attrgetter('name'))
1862 self._directories.reverse()
1864 # Set correct owner, mtime and filemode on directories.
1865 for member in self._directories:
1866 dirpath = member.name
1868 os.chmod(dirpath, member.mode)
1869 os.utime(dirpath, (member.mtime, member.mtime))
1871 # We have to be root to do so.
1873 g = grp.getgrnam(member.gname)[2]
1877 u = pwd.getpwnam(member.uname)[2]
1881 if member.issym and hasattr(os, "lchown"):
1882 os.lchown(dirpath, u, g)
1884 os.chown(dirpath, u, g)
1885 except EnvironmentError:
1886 raise tarfile.ExtractError("could not change owner")
1888 except tarfile.ExtractError as e:
1889 self._deltatar.logger.warning('tarfile: %s' % e)
1892 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
1894 Handles the new volumes
1896 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1897 volume_number, guess_name=True)
1898 volume_path = os.path.join(backup_path, volume_name)
1900 # we convert relative paths into absolute because CWD is changed
1901 if not os.path.isabs(volume_path):
1902 volume_path = os.path.join(cwd, volume_path)
1903 tarobj.open_volume(volume_path, encryption=encryption)
1905 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
1907 Restores a snapshot of a file from a specific backup
1909 op_type = file_data.get('type', -1)
1910 member = file_data.get('member', None)
1911 ismember = bool(member)
1913 # when member is set, then we can assume everything is right and we
1914 # just have to restore the path
1916 vol_no = file_data.get('volume', -1)
1918 if not isinstance(vol_no, int) or vol_no < 0:
1919 self._deltatar.logger.warning('unrecognized type to be restored: '
1920 '%s, line %d' % (op_type, l_no))
1922 # setup the volume that needs to be read. only needed when member is
1924 if index_data['curr_vol_no'] != vol_no:
1925 index_data['curr_vol_no'] = vol_no
1926 backup_path = os.path.dirname(index_data['path'])
1927 vol_name = self._deltatar.volume_name_func(backup_path,
1928 index_data['is_full'], vol_no, guess_name=True)
1929 vol_path = os.path.join(backup_path, vol_name)
1930 if index_data['vol_fd']:
1931 index_data['vol_fd'].close()
1932 index_data['vol_fd'] = open(vol_path, 'rb')
1934 # force reopen of the tarobj because of new volume
1935 if index_data['tarobj']:
1936 index_data['tarobj'].close()
1937 index_data['tarobj'] = None
1939 # seek tarfile if needed
1940 offset = file_data.get('offset', -1)
1941 if index_data['tarobj']:
1942 if self._disaster == tarfile.TOLERANCE_RESCUE:
1943 # force a seek and reopen
1944 index_data['tarobj'].close()
1945 index_data['tarobj'] = None
1948 member = index_data['tarobj'].__iter__().__next__()
1949 except tarfile.DecryptionError:
1951 except tarfile.CompressionError:
1954 if not member or member.path != file_data['path']:
1955 # force a seek and reopen
1956 index_data['tarobj'].close()
1957 index_data['tarobj'] = None
1960 # open the tarfile if needed
1961 if not index_data['tarobj']:
1962 index_data['vol_fd'].seek(offset)
1963 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1964 fileobj=index_data['vol_fd'],
1965 format=tarfile.GNU_FORMAT,
1966 concat='#' in self._deltatar.mode,
1967 encryption=index_data["decryptor"],
1968 new_volume_handler=index_data['new_volume_handler'],
1969 save_to_members=False,
1970 tolerance=self._disaster)
1972 member = index_data['tarobj'].__iter__().__next__()
1974 member.path = unprefixed_path
1975 member.name = unprefixed_path
1977 if op_type == 'directory':
1978 self.add_member_dir(member)
1979 member = copy.copy(member)
1980 member.mode = 0o0700
1982 # if it's an existing directory, we then don't need to recreate it
1983 # just set the right permissions, mtime and that kind of stuff
1984 if os.path.exists(member.path):
1988 # set current volume number in tarobj, otherwise the extraction of the
1989 # file might fail when trying to extract a multivolume member
1990 index_data['tarobj'].volume_number = index_data['curr_vol_no']
1992 def ignore_symlink (member, *_args):
1993 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
1995 # finally, restore the file
1996 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
1998 def add_member_dir(self, member):
2000 Add member dir to be restored at the end
2002 if not self.canchown:
2003 self._directories.append(DirItem(name=member.name, mode=member.mode,
2004 mtime=member.mtime))
2006 self._directories.append(DirItem(name=member.name, mode=member.mode,
2007 mtime=member.mtime, gname=member.gname, uname=member.uname,
2008 uid=member.uid, gid=member.gid, issym=member.issym()))
2010 class DirItem(object):
2011 def __init__(self, **kwargs):
2012 for k, v in kwargs.items():