3 # Copyright (C) 2013, 2014 Intra2net AG
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published
7 # by the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see
17 # <http://www.gnu.org/licenses/lgpl-3.0.html>
19 DELTATAR_HEADER_VERSION = 1
20 DELTATAR_PARAMETER_VERSION = 1
33 from functools import partial
38 class NullHandler(logging.Handler):
39 def emit(self, record):
43 logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
51 # encryption direction
52 CRYPTO_MODE_ENCRYPT = 0
53 CRYPTO_MODE_DECRYPT = 1
55 # The canonical extension for encrypted backup files regardless of the actual
56 # encryption parameters is “.pdtcrypt”. This is analogous to the encryption
57 # header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
58 # Since the introduction of the versioned header there no longer any need
59 # for encoding encryption parameters in the file extensions (“.aes128” and
61 PDTCRYPT_EXTENSION = "pdtcrypt"
65 AUXILIARY_FILE_INDEX = 0
66 AUXILIARY_FILE_INFO = 1
68 class DeltaTar(object):
70 Backup class used to create backups
73 # list of files to exclude in the backup creation or restore operation. It
74 # can contain python regular expressions.
77 # list of files to include in the backup creation or restore operation. It
78 # can contain python regular expressions. If empty, all files in the source
79 # path will be backed up (when creating a backup) or all the files in the
80 # backup will be restored (when restoring a backup), but if included_files
81 # is set then only the files include in the list will be processed.
84 # custom filter of files to be backed up (or restored). Unused and unset
85 # by default. The function receives a file path and must return a boolean.
88 # mode in which the delta will be created (when creating a backup) or
89 # opened (when restoring). Accepts modes analog to the tarfile library.
92 # used together with aes modes to encrypt and decrypt backups.
97 # parameter version to use when encrypting; note that this has no effect
98 # on decryption since the required settings are determined from the headers
99 crypto_version = DELTATAR_HEADER_VERSION
100 crypto_paramversion = None
102 # when encrypting or decrypting, these hold crypto handlers; created before
103 # establishing the Tarfile stream iff a password is supplied.
107 # python logger object.
110 # specifies the index mode in the same format as @param mode, but without
111 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
112 # that the index is encrypted if no password is given in the constructor.
115 # current time for this backup. Used for file names and file creation checks
118 # extra data to included in the header of the index file when creating a
122 # valid tarfile modes and their corresponding default file extension
123 __file_extensions_dict = {
132 '#gz.pdtcrypt': '.gz',
137 # valid index modes and their corresponding default file extension
138 __index_extensions_dict = {
142 'gz.pdtcrypt': '.gz',
146 # valid path prefixes
147 __path_prefix_list = [
153 def __init__(self, excluded_files=[], included_files=[],
154 filter_func=None, mode="", password=None,
155 crypto_key=None, nacl=None,
156 crypto_version=DELTATAR_HEADER_VERSION,
157 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
158 logger=None, index_mode=None, index_name_func=None,
159 volume_name_func=None):
161 Constructor. Configures the diff engine.
164 - excluded_files: list of files to exclude in the backup creation or
165 restore operation. It can contain python regular expressions.
167 - included_files: list of files to include in the backup creation or
168 restore operation. It can contain python regular expressions. If
169 empty, all files in the source path will be backed up (when creating a
170 backup) or all the files in the backup will be restored (when
171 restoring a backup), but if included_files is set then only the files
172 include in the list will be processed.
174 - filter_func: custom filter of files to be backed up (or restored).
175 Unused and unset by default. The function receives a file path and
176 must return a boolean.
178 - mode: mode in which the delta will be created (when creating a backup)
179 or opened (when restoring). Accepts the same modes as the tarfile
180 library. Valid modes are:
183 ':' open uncompressed
184 ':gz' open with gzip compression
185 ':bz2' open with bzip2 compression
186 '|' open an uncompressed stream of tar blocks
187 '|gz' open a gzip compressed stream of tar blocks
188 '|bz2' open a bzip2 compressed stream of tar blocks
189 '#gz' open a stream of gzip compressed tar blocks
191 - crypto_key: used to encrypt and decrypt backups. Encryption will
192 be enabled automatically if a key is supplied. Requires a salt to be
195 - nacl: salt that was used to derive the encryption key for embedding
196 in the PDTCRYPT header. Not needed when decrypting and when
197 encrypting with password.
199 - password: used to encrypt and decrypt backups. Encryption will be
200 enabled automatically if a password is supplied.
202 - crypto_version: version of the format, determining the kind of PDT
205 - crypto_paramversion: optionally request encryption conforming to
206 a specific parameter version. Defaults to the standard PDT value
207 which as of 2017 is the only one available.
209 - logger: python logger object. Optional.
211 - index_mode: specifies the index mode in the same format as @param
212 mode, but without the ':', '|' or '#' at the begining. If encryption
213 is requested it will extend to the auxiliary (index, info) files as
214 well. This is an optional parameter that will automatically mimic
215 @param mode by default if not provided. Valid modes are:
218 'gz' open with gzip compression
219 'bz2' open with bzip2 compression
221 - index_name_func: function that sets a custom name for the index file.
222 This function receives a flag to indicate whether the name will be
223 used for a full or diff backup. The backup path will be prepended to
226 - volume_name_func: function that defines the name of tar volumes. It
227 receives the backup_path, if it's a full backup and the volume number,
228 and must return the name for the corresponding volume name. Optional,
229 DeltaTar has default names for tar volumes.
232 if mode not in self.__file_extensions_dict:
233 raise Exception('Unrecognized extension mode=[%s] requested for files'
236 self.excluded_files = excluded_files
237 self.included_files = included_files
238 self.filter_func = filter_func
239 self.logger = logging.getLogger('deltatar.DeltaTar')
241 self.logger.addHandler(logger)
244 if crypto_key is not None:
245 self.crypto_key = crypto_key
246 self.nacl = nacl # encryption only
248 if password is not None:
249 self.password = password
251 if crypto_version is not None:
252 self.crypto_version = crypto_version
254 if crypto_paramversion is not None:
255 self.crypto_paramversion = crypto_paramversion
257 # generate index_mode
258 if index_mode is None:
264 elif mode not in self.__index_extensions_dict:
265 raise Exception('Unrecognized extension mode=[%s] requested for index'
268 self.index_mode = index_mode
269 self.current_time = datetime.datetime.now()
271 if index_name_func is not None:
272 self.index_name_func = index_name_func
274 if volume_name_func is not None:
275 self.volume_name_func = volume_name_func
277 def pick_extension(self, kind, mode=None):
279 Choose the extension depending on a) the kind of file given, b) the
280 processing mode, and c) the current encryption settings.
283 if kind == PDT_TYPE_ARCHIVE:
286 mode = self.__index_extensions_dict [self.index_mode]
288 if self.crypto_key is not None or self.password is not None:
289 ret += "." + PDTCRYPT_EXTENSION
292 def index_name_func(self, is_full): # pylint: disable=method-hidden
294 Callback for setting a custom name for the index file. Depending on
295 whether *is_full* is set, it will create a suitable name for a full
298 prefix = "bfull" if is_full else "bdiff"
299 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
300 extension = self.pick_extension \
302 self.__index_extensions_dict [self.index_mode])
304 return "%s-%s.index%s" % (prefix, date_str, extension)
306 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
307 is_full, volume_number,
310 function that defines the name of tar volumes. It receives the
311 backup_path, if it's a full backup and the volume number, and must return
312 the name for the corresponding volume name. Optional, DeltaTar has default
313 names for tar volumes.
315 If guess_name is activated, the file is intended not to be created but
316 to be found, and thus the date will be guessed.
318 prefix = "bfull" if is_full else "bdiff"
319 extension = self.pick_extension \
321 self.__file_extensions_dict [self.mode])
324 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
325 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
327 prefix = prefix + "-"
328 postfix = "-%03d%s" % (volume_number + 1, extension)
329 for f in os.listdir(backup_path):
330 if f.startswith(prefix) and f.endswith(postfix):
332 raise Exception("volume not found")
335 def filter_path(self, path, source_path="", is_dir=None):
337 Filters a path, given the source_path, using the filtering properties
338 set in the constructor.
339 The filtering order is:
340 1. included_files (if any)
342 3. filter_func (which must return whether the file is accepted or not)
345 if len(source_path) > 0:
346 # ensure that exactly one '/' at end of dir is also removed
347 source_path = source_path.rstrip(os.sep) + os.sep
348 path = path[len(source_path):]
350 # 1. filter included_files
352 if len(self.included_files) > 0:
354 for i in self.included_files:
355 # it can be either a regexp or a string
356 if isinstance(i, str):
357 # if the string matches, then continue
362 # if the string ends with / it's a directory, and if the
363 # path is contained in it, it is included
364 if i.endswith('/') and path.startswith(i):
368 # if the string doesn't end with /, add it and do the same
370 elif path.startswith(i + '/'):
374 # check for PARENT_MATCH
377 if not dir_path.endswith('/'):
380 if i.startswith(dir_path):
383 # if it's a reg exp, then we just check if it matches
384 elif isinstance(i, re._pattern_type):
389 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
391 if match == NO_MATCH:
394 # when a directory is in PARENT_MATCH, it doesn't matter if it's
395 # excluded. It's subfiles will be excluded, but the directory itself
397 if match != PARENT_MATCH:
398 for e in self.excluded_files:
399 # it can be either a regexp or a string
400 if isinstance(e, str):
401 # if the string matches, then exclude
405 # if the string ends with / it's a directory, and if the
406 # path starts with the directory, then exclude
407 if e.endswith('/') and path.startswith(e):
410 # if the string doesn't end with /, do the same check with
412 elif path.startswith(e + '/'):
415 # if it's a reg exp, then we just check if it matches
416 elif isinstance(e, re._pattern_type):
420 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
423 return self.filter_func(path)
427 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
429 Walk a directory recursively, yielding each file/directory
431 Returns the path of an entity. If ``keep_base_dir`` is set,
432 the path returned contains the prefix ``source_path``; otherwise it is
433 relative to the prefix.
436 source_path = source_path.rstrip(os.sep)
441 beginning_size = len(source_path) + 1 # +1 for os.sep
443 queue = [source_path]
446 cur_path = queue.pop(0)
448 dfd = os.open (cur_path, os.O_DIRECTORY)
449 if dfd == -1: # it might have been removed in the meantime
453 for filename in sorted(os.listdir(dfd)):
454 child = os.path.join(cur_path, filename)
455 is_dir = os.path.isdir(child)
456 status = self.filter_path(child, source_path, is_dir)
457 if status == NO_MATCH:
459 if not os.access(child, os.R_OK):
460 self.logger.warning('Error accessing possibly locked file %s' % child)
464 yield child[beginning_size:]
466 if is_dir and (status == MATCH or status == PARENT_MATCH):
471 def _stat_dict(self, path):
473 Returns a dict with the stat data used to compare files
475 stinfo = os.stat(path)
476 mode = stinfo.st_mode
479 if stat.S_ISDIR(mode):
481 elif stat.S_ISREG(mode):
483 elif stat.S_ISLNK(mode):
490 u'mtime': int(stinfo.st_mtime),
491 u'ctime': int(stinfo.st_ctime),
492 u'uid': stinfo.st_uid,
493 u'gid': stinfo.st_gid,
494 u'inode': stinfo.st_ino,
495 u'size': stinfo.st_size
498 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
500 Return if the dicts are equal in the stat keys
502 keys = [u'type', u'mode',u'size', u'mtime',
503 # not restored: u'inode', u'ctime'
506 # only if user is root, then also check gid/uid. otherwise do not check it,
507 # because tarfile can chown in case of being superuser only
509 # also, skip the check in rpmbuild since the sources end up with the
510 # uid:gid of the packager while the extracted files are 0:0.
511 if hasattr(os, "geteuid") and os.geteuid() == 0 \
512 and os.getenv ("RPMBUILD_OPTIONS") is None:
516 if (not d1 and d2 != None) or (d1 != None and not d2):
519 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
522 type = d1.get('type', '')
525 # size doesn't matter for directories
526 if type == 'directory' and key == 'size':
528 if d1.get(key, -1) != d2.get(key, -2):
532 def prefixed(self, path, listsnapshot_equal=False):
534 if a path is not prefixed, return it prefixed
536 for prefix in self.__path_prefix_list:
537 if path.startswith(prefix):
538 if listsnapshot_equal and prefix == u'list://':
539 return u'snapshot://' + path[len(prefix):]
541 return u'snapshot://' + path
543 def unprefixed(self, path):
545 remove a path prefix if any
547 for prefix in self.__path_prefix_list:
548 if path.startswith(prefix):
549 return path[len(prefix):]
553 def initialize_encryption (self, mode):
554 password = self.password
555 key = self.crypto_key
558 if key is None and password is None:
560 if mode == CRYPTO_MODE_ENCRYPT:
561 return crypto.Encrypt (password=password,
564 version=self.crypto_version,
565 paramversion=self.crypto_paramversion)
566 if mode == CRYPTO_MODE_DECRYPT:
567 return crypto.Decrypt (password=password, key=key)
569 raise Exception ("invalid encryption mode [%r]" % mode)
572 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
574 Given the specified configuration, opens a file for reading or writing,
575 inheriting the encryption and compression settings from the backup.
576 Returns a file object ready to use.
578 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
581 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
582 Both the info and the auxiliary file have a globally
583 unique, constant counter value.
586 if self.index_mode.startswith('gz'):
588 elif self.index_mode.startswith('bz2'):
596 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
598 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
600 if crypto_ctx is not None:
601 if kind == AUXILIARY_FILE_INFO:
602 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
603 elif kind == AUXILIARY_FILE_INDEX:
604 enccounter = crypto.AES_GCM_IV_CNT_INDEX
606 raise Exception ("invalid kind of aux file %r" % kind)
608 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
609 bufsize=tarfile.RECORDSIZE, fileobj=None,
610 encryption=crypto_ctx, enccounter=enccounter)
615 def create_full_backup(self, source_path, backup_path,
616 max_volume_size=None, extra_data=dict()):
618 Creates a full backup.
621 - source_path: source path to the directory to back up.
622 - backup_path: path where the back up will be stored. Backup path will
623 be created if not existent.
624 - max_volume_size: maximum volume size in megabytes. Used to split the
625 backup in volumes. Optional (won't split in volumes by default).
626 - extra_data: a json-serializable dictionary with information that you
627 want to be included in the header of the index file
630 if not isinstance(source_path, str):
631 raise Exception('Source path must be a string')
633 if not isinstance(backup_path, str):
634 raise Exception('Backup path must be a string')
636 if not os.path.exists(source_path) or not os.path.isdir(source_path):
637 raise Exception('Source path "%s" does not exist or is not a '\
638 'directory' % source_path)
640 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
641 max_volume_size < 1):
642 raise Exception('max_volume_size must be a positive integer')
643 if max_volume_size != None:
644 max_volume_size = max_volume_size*1024*1024
646 if not isinstance(extra_data, dict):
647 raise Exception('extra_data must be a dictionary')
650 extra_data_str = json.dumps(extra_data)
652 raise Exception('extra_data is not json-serializable')
654 if not os.access(source_path, os.R_OK):
655 raise Exception('Source path "%s" is not readable' % source_path)
657 # try to create backup path if needed
658 if not os.path.exists(backup_path):
659 os.makedirs(backup_path)
661 if not os.access(backup_path, os.W_OK):
662 raise Exception('Backup path "%s" is not writeable' % backup_path)
664 if source_path.endswith('/'):
665 source_path = source_path[:-1]
667 if backup_path.endswith('/'):
668 backup_path = backup_path[:-1]
670 # update current time
671 self.current_time = datetime.datetime.now()
673 if self.mode not in self.__file_extensions_dict:
674 raise Exception('Unrecognized extension')
676 # setup for encrypting payload
677 if self.encryptor is None:
678 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
680 # some initialization
683 # generate the first volume name
684 vol_name = self.volume_name_func(backup_path, True, 0)
685 tarfile_path = os.path.join(backup_path, vol_name)
688 index_name = self.index_name_func(True)
689 index_path = os.path.join(backup_path, index_name)
690 index_sink = self.open_auxiliary_file(index_path, 'w')
694 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
696 Handles the new volumes
698 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
699 volume_path = os.path.join(backup_path, volume_name)
700 deltarobj.vol_no = volume_number
702 # we convert relative paths into absolute because CWD is changed
703 if not os.path.isabs(volume_path):
704 volume_path = os.path.join(cwd, volume_path)
706 if tarobj.fileobj is not None:
707 tarobj.fileobj.close()
709 deltarobj.logger.debug("opening volume %s" % volume_path)
711 tarobj.open_volume(volume_path, encryption=encryption)
713 # wraps some args from context into the handler
714 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
716 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
718 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
719 # calculate checksum and write into the stream
720 crc = binascii.crc32(s) & 0xFFFFffff
723 # start creating the tarfile
724 tarobj = tarfile.TarFile.open(tarfile_path,
725 mode='w' + self.mode,
726 format=tarfile.GNU_FORMAT,
727 concat='#' in self.mode,
728 encryption=self.encryptor,
729 max_volume_size=max_volume_size,
730 new_volume_handler=new_volume_handler,
731 save_to_members=False,
733 os.chdir(source_path)
735 # for each file to be in the backup, do:
736 for path in self._recursive_walk_dir('.'):
737 # calculate stat dict for current file
738 statd = self._stat_dict(path)
739 statd['path'] = u'snapshot://' + statd['path']
740 statd['volume'] = self.vol_no
745 tarobj.add(path, arcname = statd['path'], recursive=False)
746 except FileNotFoundError as exn:
747 # file vanished since the call to access(3) above
748 self.logger.warning ("object [%s] no longer available in "
749 "file system (error: %s); skipping"
751 continue # prevent indexing
753 # retrieve file offset
754 statd['offset'] = tarobj.get_last_member_offset()
755 self.logger.debug("backup %s" % statd['path'])
757 # store the stat dict in the index
758 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
759 crc = binascii.crc32(s, crc) & 0xffffffff
762 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
763 crc = binascii.crc32(s, crc) & 0xffffffff
765 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
770 index_sink.close (close_fileobj=True)
772 def create_diff_backup(self, source_path, backup_path, previous_index_path,
773 max_volume_size=None, extra_data=dict()):
778 - source_path: source path to the directory to back up.
779 - backup_path: path where the back up will be stored. Backup path will
780 be created if not existent.
781 - previous_index_path: index of the previous backup, needed to know
782 which files changed since then.
783 - max_volume_size: maximum volume size in megabytes (MB). Used to split
784 the backup in volumes. Optional (won't split in volumes by default).
786 NOTE: previous index is assumed to follow exactly the same format as
787 the index_mode setup in the constructor.
789 # check/sanitize input
790 if not isinstance(source_path, str):
791 raise Exception('Source path must be a string')
793 if not isinstance(backup_path, str):
794 raise Exception('Backup path must be a string')
796 if not os.path.exists(source_path) or not os.path.isdir(source_path):
797 raise Exception('Source path "%s" does not exist or is not a '\
798 'directory' % source_path)
800 if not isinstance(extra_data, dict):
801 raise Exception('extra_data must be a dictionary')
804 extra_data_str = json.dumps(extra_data)
806 raise Exception('extra_data is not json-serializable')
808 if not os.access(source_path, os.R_OK):
809 raise Exception('Source path "%s" is not readable' % source_path)
811 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
812 max_volume_size < 1):
813 raise Exception('max_volume_size must be a positive integer')
814 if max_volume_size != None:
815 max_volume_size = max_volume_size*1024*1024
817 if not isinstance(previous_index_path, str):
818 raise Exception('previous_index_path must be A string')
820 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
821 raise Exception('Index path "%s" does not exist or is not a '\
822 'file' % previous_index_path)
824 if not os.access(previous_index_path, os.R_OK):
825 raise Exception('Index path "%s" is not readable' % previous_index_path)
827 # try to create backup path if needed
828 if not os.path.exists(backup_path):
829 os.makedirs(backup_path)
831 if not os.access(backup_path, os.W_OK):
832 raise Exception('Backup path "%s" is not writeable' % backup_path)
834 if source_path.endswith('/'):
835 source_path = source_path[:-1]
837 if backup_path.endswith('/'):
838 backup_path = backup_path[:-1]
840 # update current time
841 self.current_time = datetime.datetime.now()
843 if self.mode not in self.__file_extensions_dict:
844 raise Exception('Unrecognized extension')
846 # setup for encrypting payload
847 if self.encryptor is None:
848 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
850 # some initialization
853 # generate the first volume name
854 vol_name = self.volume_name_func(backup_path, is_full=False,
856 tarfile_path = os.path.join(backup_path, vol_name)
861 index_name = self.index_name_func(is_full=False)
862 index_path = os.path.join(backup_path, index_name)
863 index_sink = self.open_auxiliary_file(index_path, 'w')
865 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
867 Handles the new volumes
869 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
870 volume_number=volume_number)
871 volume_path = os.path.join(backup_path, volume_name)
872 deltarobj.vol_no = volume_number
874 # we convert relative paths into absolute because CWD is changed
875 if not os.path.isabs(volume_path):
876 volume_path = os.path.join(cwd, volume_path)
878 deltarobj.logger.debug("opening volume %s" % volume_path)
879 tarobj.open_volume(volume_path)
881 # wraps some args from context into the handler
882 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
884 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
886 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
887 # calculate checksum and write into the stream
888 crc = binascii.crc32(s) & 0xFFFFffff
891 # start creating the tarfile
892 tarobj = tarfile.TarFile.open(tarfile_path,
893 mode='w' + self.mode,
894 format=tarfile.GNU_FORMAT,
895 concat='#' in self.mode,
896 encryption=self.encryptor,
897 max_volume_size=max_volume_size,
898 new_volume_handler=new_volume_handler,
899 save_to_members=False,
903 # create the iterators, first the previous index iterator, then the
904 # source path directory iterator and collate and iterate them
905 if not os.path.isabs(previous_index_path):
906 previous_index_path = os.path.join(cwd, previous_index_path)
907 index_it = self.iterate_index_path(previous_index_path)
909 os.chdir(source_path)
910 dir_it = self._recursive_walk_dir('.')
911 dir_path_it = self.jsonize_path_iterator(dir_it)
919 # for each file to be in the backup, do:
920 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
922 # if file is not in the index, it means it's a new file, so we have
927 # if the file is not in the directory iterator, it means that it has
928 # been deleted, so we need to mark it as such
931 # if the file is in both iterators, it means it might have either
932 # not changed (in which case we will just list it in our index but
933 # it will not be included in the tar file), or it might have
934 # changed, in which case we will snapshot it.
935 elif ipath and dpath:
936 if self._equal_stat_dicts(ipath, dpath):
940 # TODO: when creating chained backups (i.e. diffing from another
941 # diff), we will need to detect the type of action in the previous
942 # index, because if it was delete and dpath is None, we should
945 if action == 'snapshot':
946 # calculate stat dict for current file
948 stat['path'] = "snapshot://" + dpath['path']
949 stat['volume'] = self.vol_no
951 self.logger.debug("[STORE] %s" % dpath['path'])
954 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
955 # retrieve file offset
956 stat['offset'] = tarobj.get_last_member_offset()
957 except FileNotFoundError as exn:
958 # file vanished since the call to access(3) above
959 self.logger.warning ("object [%s] no longer available in "
960 "file system (error: %s); skipping"
961 % (dpath ["path"], str (exn)))
962 stat = None # prevent indexing
964 elif action == 'delete':
965 path = self.unprefixed(ipath['path'])
967 u'path': u'delete://' + path,
968 u'type': ipath['type']
970 self.logger.debug("[DELETE] %s" % path)
972 # mark it as deleted in the backup
973 tarobj.add("/dev/null", arcname=stat['path'])
974 elif action == 'list':
976 path = self.unprefixed(ipath['path'])
977 stat['path'] = u'list://' + path
978 # unchanged files do not enter in the backup, only in the index
979 self.logger.debug("[UNCHANGED] %s" % path)
982 self.logger.warning('unknown action in create_diff_backup: {0}'
987 # store the stat dict in the index
988 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
989 crc = binascii.crc32(s, crc) & 0xffffffff
992 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
993 crc = binascii.crc32(s, crc) & 0xffffffff
995 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
1004 def iterate_index_path(self, index_path):
1006 Returns an index iterator. Internally, it uses a classic iterator class.
1007 We do that instead of just yielding so that the iterator object can have
1008 an additional function to close the file descriptor that is opened in
1012 class IndexPathIterator(object):
1013 def __init__(self, delta_tar, index_path):
1014 self.delta_tar = delta_tar
1015 self.index_path = index_path
1017 self.extra_data = dict()
1027 def __enter__(self):
1029 Allows this iterator to be used with the "with" statement
1032 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
1033 # check index header
1034 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1035 if j.get("type", '') != 'python-delta-tar-index' or\
1036 j.get('version', -1) != 1:
1037 raise Exception("invalid index file format: %s" % json.dumps(j))
1039 self.extra_data = j.get('extra_data', dict())
1041 # find BEGIN-FILE-LIST, ignore other headers
1043 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1044 if j.get('type', '') == 'BEGIN-FILE-LIST':
1048 def __exit__(self, type, value, tb):
1050 Allows this iterator to be used with the "with" statement
1057 # read each file in the index and process it to do the restore
1061 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1062 except Exception as e:
1067 op_type = j.get('type', '')
1069 # when we detect the end of the list, break the loop
1070 if op_type == 'END-FILE-LIST':
1076 if op_type not in ['directory', 'file', 'link']:
1077 self.delta_tar.logger.warning('unrecognized type to be '
1078 'restored: %s, line %d' % (op_type, l_no))
1080 return self.__next__()
1084 return IndexPathIterator(self, index_path)
1086 def iterate_tar_path(self, tar_path, new_volume_handler=None):
1088 Returns a tar iterator that iterates jsonized member items that contain
1089 an additional "member" field, used by RestoreHelper.
1091 class TarPathIterator(object):
1092 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
1093 self.delta_tar = delta_tar
1094 self.tar_path = tar_path
1096 self.last_member = None
1097 self.new_volume_handler = new_volume_handler
1105 self.tar_obj.close()
1107 def __enter__(self):
1109 Allows this iterator to be used with the "with" statement
1111 if self.tar_obj is None:
1113 if self.delta_tar.password is not None:
1114 decryptor = crypto.Decrypt \
1115 (password=self.delta_tar.password,
1116 key=self.delta_tar.crypto_key)
1117 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1118 mode='r' + self.delta_tar.mode,
1119 format=tarfile.GNU_FORMAT,
1120 concat='#' in self.delta_tar.mode,
1121 encryption=decryptor,
1122 new_volume_handler=self.new_volume_handler,
1123 save_to_members=False,
1127 def __exit__(self, type, value, tb):
1129 Allows this iterator to be used with the "with" statement
1132 self.tar_obj.close()
1137 Read each member and return it as a stat dict
1139 tarinfo = self.tar_obj.__iter__().__next__()
1140 # NOTE: here we compare if tarinfo.path is the same as before
1141 # instead of comparing the tarinfo object itself because the
1142 # object itself might change for multivol tarinfos
1143 if tarinfo is None or (self.last_member is not None and\
1144 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
1147 self.last_member = tarinfo
1150 if tarinfo.isfile():
1152 elif tarinfo.isdir():
1154 elif tarinfo.islnk() or tarinfo.issym():
1159 u'path': tarinfo.path,
1160 u'mode': tarinfo.mode,
1161 u'mtime': tarinfo.mtime,
1162 u'ctime': -1, # cannot restore
1163 u'uid': tarinfo.uid,
1164 u'gid': tarinfo.gid,
1165 u'inode': -1, # cannot restore
1166 u'size': tarinfo.size,
1170 return TarPathIterator(self, tar_path, new_volume_handler)
1172 def jsonize_path_iterator(self, iter, strip=0):
1174 converts the yielded items of an iterator into json path lines.
1176 strip: Strip the smallest prefix containing num leading slashes from
1181 path = iter.__next__()
1183 yield self._stat_dict(path), 0
1185 st = self._stat_dict(path)
1186 st['path'] = "/".join(path.split("/")[strip:])
1188 except StopIteration:
1191 def iterate_disaster_index (self, index):
1193 Mimick the behavior of the other object iterators, just with the inputs
1194 supplied directly as *index*.
1197 class RawIndexIterator(object):
1198 def __init__(self, delta_tar, index):
1199 self.delta_tar = delta_tar
1209 def __enter__(self):
1211 Allows this iterator to be used with the "with" statement
1213 self.iter = self.index.__iter__ ()
1216 def __exit__(self, type, value, tb):
1218 Allows this iterator to be used with the "with" statement
1222 idxent = self.iter.__next__ ()
1225 return RawIndexIterator(self, index)
1227 def collate_iterators(self, it1, it2):
1229 Collate two iterators, so that it returns pairs of the items of each
1230 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1231 when there's no match for the items in the other iterator.
1233 It assumes that the items in both lists are ordered in the same way.
1236 elem1, elem2 = None, None
1240 elem1, l_no = it1.__next__()
1241 except StopIteration:
1243 yield (None, elem2, l_no)
1245 if isinstance(elem2, tuple):
1247 yield (None, elem2, l_no)
1251 elem2 = it2.__next__()
1252 if isinstance(elem2, tuple):
1254 except StopIteration:
1256 yield (elem1, None, l_no)
1257 for elem1, l_no in it1:
1258 yield (elem1, None, l_no)
1261 index1 = self.unprefixed(elem1['path'])
1262 index2 = self.unprefixed(elem2['path'])
1263 i1, i2 = self.compare_indexes(index1, index2)
1265 yield1 = yield2 = None
1272 yield (yield1, yield2, l_no)
1274 def compare_indexes(self, index1, index2):
1276 Compare iterator indexes and return a tuple in the following form:
1277 if index1 < index2, returns (index1, None)
1278 if index1 == index2 returns (index1, index2)
1279 else: returns (None, index2)
1281 l1 = index1.split('/')
1282 l2 = index2.split('/')
1283 length = len(l2) - len(l1)
1286 return (index1, None)
1288 return (None, index2)
1290 for i1, i2 in zip(l1, l2):
1292 return (index1, None)
1294 return (None, index2)
1296 return (index1, index2)
1298 def list_backup(self, backup_tar_path, list_func=None):
1299 if not isinstance(backup_tar_path, str):
1300 raise Exception('Backup tar path must be a string')
1302 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1303 raise Exception('Source path "%s" does not exist or is not a '\
1304 'file' % backup_tar_path)
1306 if not os.access(backup_tar_path, os.R_OK):
1307 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1311 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
1313 Handles the new volumes
1315 volume_name = deltarobj.volume_name_func(backup_path, True,
1316 volume_number, guess_name=True)
1317 volume_path = os.path.join(backup_path, volume_name)
1319 # we convert relative paths into absolute because CWD is changed
1320 if not os.path.isabs(volume_path):
1321 volume_path = os.path.join(cwd, volume_path)
1322 tarobj.open_volume(volume_path, encryption=encryption)
1324 if self.decryptor is None:
1325 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1327 backup_path = os.path.dirname(backup_tar_path)
1328 if not os.path.isabs(backup_path):
1329 backup_path = os.path.join(cwd, backup_path)
1330 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
1332 tarobj = tarfile.TarFile.open(backup_tar_path,
1333 mode='r' + self.mode,
1334 format=tarfile.GNU_FORMAT,
1335 concat='#' in self.mode,
1336 encryption=self.decryptor,
1337 new_volume_handler=new_volume_handler,
1338 save_to_members=False,
1341 def filter(cls, list_func, tarinfo):
1342 if list_func is None:
1343 self.logger.info(tarinfo.path)
1347 filter = partial(filter, self, list_func)
1349 tarobj.extractall(filter=filter)
1352 def restore_backup(self, target_path, backup_indexes_paths=[],
1353 backup_tar_path=None, restore_callback=None,
1354 disaster=tarfile.TOLERANCE_STRICT, backup_index=None):
1359 - target_path: path to restore.
1360 - backup_indexes_paths: path to backup indexes, in descending date order.
1361 The indexes indicate the location of their respective backup volumes,
1362 and multiple indexes are needed to be able to restore diff backups.
1363 Note that this is an optional parameter: if not suplied, it will
1364 try to restore directly from backup_tar_path.
1365 - backup_tar_path: path to the backup tar file. Used as an alternative
1366 to backup_indexes_paths to restore directly from a tar file without
1367 using any file index. If it's a multivol tarfile, volume_name_func
1369 - restore_callback: callback function to be called during restore.
1370 This is passed to the helper and gets called for every file.
1372 NOTE: If you want to use an index to restore a backup, this function
1373 only supports to do so when the tarfile mode is either uncompressed or
1374 uses concat compress mode, because otherwise it would be very slow.
1376 NOTE: Indices are assumed to follow the same format as the index_mode
1377 specified in the constructor.
1379 Returns the list of files that could not be restored, if there were
1382 # check/sanitize input
1383 if not isinstance(target_path, str):
1384 raise Exception('Target path must be a string')
1386 if backup_indexes_paths is None and backup_tar_path == []:
1387 raise Exception("You have to either provide index paths or a tar path")
1389 if isinstance (backup_index, list) is True:
1391 elif len(backup_indexes_paths) == 0:
1397 if not isinstance(backup_tar_path, str):
1398 raise Exception('Backup tar path must be a string')
1400 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1401 raise Exception('Source path "%s" does not exist or is not a '\
1402 'file' % backup_tar_path)
1404 if not os.access(backup_tar_path, os.R_OK):
1405 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1407 if not isinstance(backup_indexes_paths, list):
1408 raise Exception('backup_indexes_paths must be a list')
1410 if self.mode.startswith(':') or self.mode.startswith('|'):
1411 raise Exception('Restore only supports either uncompressed tars'
1412 ' or concat compression when restoring from an index, and '
1413 ' the open mode you provided is "%s"' % self.mode)
1415 for index in backup_indexes_paths:
1416 if not isinstance(index, str):
1417 raise Exception('indices must be strings')
1419 if not os.path.exists(index) or not os.path.isfile(index):
1420 raise Exception('Index path "%s" does not exist or is not a '\
1423 if not os.access(index, os.R_OK):
1424 raise Exception('Index path "%s" is not readable' % index)
1426 # try to create backup path if needed
1427 if not os.path.exists(target_path):
1428 os.makedirs(target_path)
1430 # make backup_tar_path absolute so that iterate_tar_path works fine
1431 if backup_tar_path and not os.path.isabs(backup_tar_path):
1432 backup_tar_path = os.path.abspath(backup_tar_path)
1435 os.chdir(target_path)
1437 # setup for decrypting payload
1438 if self.decryptor is None:
1439 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1442 index_it = self.iterate_tar_path(backup_tar_path)
1443 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
1444 tarobj=index_it.tar_obj)
1445 elif mode == "diff":
1446 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1449 # get iterator from newest index at _data[0]
1450 index1 = helper._data[0]["path"]
1451 index_it = self.iterate_index_path(index1)
1452 except tarfile.DecryptionError as exn:
1453 self.logger.error("failed to decrypt file [%s]: %s; is this an "
1454 "actual encrypted index file?"
1455 % (index1, str (exn)))
1456 return [(index1, exn)]
1457 except Exception as exn:
1459 self.logger.error("failed to read file [%s]: %s; is this an "
1460 "actual index file?" % (index1, str (exn)))
1461 return [(index1, exn)]
1462 elif mode == "disaster":
1463 index_it = self.iterate_disaster_index (backup_index)
1464 helper = RestoreHelper (self, cwd, backup_path=backup_tar_path,
1465 backup_index=backup_index,
1469 dir_it = self._recursive_walk_dir('.')
1470 dir_path_it = self.jsonize_path_iterator(dir_it)
1472 failed = [] # irrecoverable files
1474 # for each file to be restored, do:
1475 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1477 upath = dpath['path']
1478 op_type = dpath['type']
1480 upath = self.unprefixed(ipath['path'])
1481 op_type = ipath['type']
1484 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
1487 # if types of the file mismatch, the file needs to be deleted
1489 if ipath is not None and dpath is not None and\
1490 dpath['type'] != ipath['type']:
1491 helper.delete(upath)
1493 # if file not found in dpath, we can directly restore from index
1495 # if the file doesn't exist and it needs to be deleted, it
1496 # means that work is already done
1497 if ipath['path'].startswith('delete://'):
1500 self.logger.debug("restore %s" % ipath['path'])
1501 helper.restore(ipath, l_no, restore_callback)
1502 except Exception as e:
1503 iipath = ipath.get ("path", "")
1504 self.logger.error("FAILED to restore: {} ({})"
1506 if disaster != tarfile.TOLERANCE_STRICT:
1507 failed.append ((iipath, e))
1510 # if both files are equal, we have nothing to restore
1511 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1514 # we have to restore the file, but first we need to delete the
1515 # current existing file.
1516 # we don't delete the file if it's a directory, because it might
1517 # just have changed mtime, so it's quite inefficient to remove
1520 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
1521 helper.delete(upath)
1522 self.logger.debug("restore %s" % ipath['path'])
1524 helper.restore(ipath, l_no, restore_callback)
1525 except Exception as e:
1526 if disaster == tarfile.TOLERANCE_STRICT:
1528 failed.append ((ipath.get ("path", ""), e))
1531 # if the file is not in the index (so it comes from the target
1532 # directory) then we have to delete it
1534 self.logger.debug("delete %s" % upath)
1535 helper.delete(upath)
1537 helper.restore_directories_permissions()
1545 def recover_backup(self, target_path, backup_indexes_paths=[],
1546 restore_callback=None):
1548 Walk the index, extracting objects in disaster mode. Bad files are
1549 reported along with a reason.
1551 return self.restore_backup(target_path,
1552 backup_indexes_paths=backup_indexes_paths,
1553 disaster=tarfile.TOLERANCE_RECOVER)
1556 def rescue_backup(self, target_path, backup_tar_path,
1557 restore_callback=None):
1559 More aggressive “unfsck” mode: do not rely on the index data as the
1560 files may be corrupt; skim files for header-like information and
1561 attempt to retrieve the data.
1563 def gen_volume_name (nvol):
1564 return os.path.join (os.path.dirname (backup_tar_path),
1565 self.volume_name_func (backup_tar_path,
1569 backup_index = tarfile.gen_rescue_index (gen_volume_name,
1571 password=self.password,
1572 key=self.crypto_key)
1574 return self.restore_backup(target_path,
1575 backup_index=backup_index,
1576 backup_tar_path=backup_tar_path,
1577 disaster=tarfile.TOLERANCE_RESCUE)
1580 def _parse_json_line(self, f, l_no):
1582 Read line from file like object and process it as JSON.
1587 j = json.loads(l.decode('UTF-8'))
1588 except UnicodeDecodeError as e:
1589 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1591 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1592 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1595 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1596 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1598 except ValueError as e:
1599 raise Exception("error parsing this json line "
1600 "(line number %d): %s" % (l_no, l))
1604 class RestoreHelper(object):
1606 Class used to help to restore files from indices
1609 # holds the dicts of data
1616 # list of directories to be restored. This is done as a last step, see
1617 # tarfile.extractall for details.
1620 _disaster = tarfile.TOLERANCE_STRICT
1622 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
1623 backup_index=None, tarobj=None,
1624 disaster=tarfile.TOLERANCE_STRICT):
1626 Constructor opens the tars and init the data structures.
1630 - Index list must be provided in reverse order (newer first).
1631 - “newer first” apparently means that if there are n backups
1632 provided, the last full backup is at index n-1 and the most recent
1633 diff backup is at index 0.
1634 - Only the first, the second, and the last elements of
1635 ``index_list`` are relevant, others will not be accessed.
1636 - If no ``index_list`` is provided, both ``tarobj`` and
1637 ``backup_path`` must be passed.
1638 - If ``index_list`` is provided, the values of ``tarobj`` and
1639 ``backup_path`` are ignored.
1642 self._directories = []
1643 self._deltatar = deltatar
1645 self._password = deltatar.password
1646 self._crypto_key = deltatar.crypto_key
1647 self._decryptors = []
1648 self._disaster = disaster
1655 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1656 self.canchown = True
1658 self.canchown = False
1660 if isinstance (backup_index, list) is True:
1661 decryptor = self._deltatar.decryptor
1663 [{ "curr_vol_no" : None
1667 , "path" : backup_path
1670 , "last_itelement" : None
1672 , "new_volume_handler" :
1673 partial(self.new_volume_handler,
1674 self._deltatar, self._cwd, True,
1675 os.path.dirname(backup_path), decryptor)
1676 , "decryptor" : decryptor
1678 elif index_list is not None:
1679 for index in index_list:
1680 is_full = index == index_list[-1]
1683 if self._password is not None:
1684 decryptor = crypto.Decrypt (password=self._password,
1685 key=self._crypto_key)
1687 # make paths absolute to avoid cwd problems
1688 if not os.path.isabs(index):
1689 index = os.path.normpath(os.path.join(cwd, index))
1699 last_itelement = None,
1701 new_volume_handler = partial(self.new_volume_handler,
1702 self._deltatar, self._cwd, is_full,
1703 os.path.dirname(index), decryptor),
1704 decryptor = decryptor
1706 self._data.append(s)
1708 # make paths absolute to avoid cwd problems
1709 if not os.path.isabs(backup_path):
1710 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
1712 # update the new_volume_handler of tar_obj
1713 tarobj.new_volume_handler = partial(self.new_volume_handler,
1714 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
1715 self._deltatar.decryptor)
1724 last_itelement = None,
1726 new_volume_handler = tarobj.new_volume_handler,
1727 decryptor = self._deltatar.decryptor
1729 self._data.append(s)
1734 Closes all open files
1736 for data in self._data:
1738 data['vol_fd'].close()
1739 data['vol_fd'] = None
1741 data['tarobj'].close()
1742 data['tarobj'] = None
1744 def delete(self, path):
1748 if not os.path.exists(path):
1751 # to preserve parent directory mtime, we save it
1752 parent_dir = os.path.dirname(path) or os.getcwd()
1753 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1755 if os.path.isdir(path) and not os.path.islink(path):
1760 # now we restore parent_directory mtime
1761 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1763 def restore(self, itpath, l_no, callback=None):
1765 Restore the path from the appropriate backup. Receives the current path
1766 from the newest (=first) index iterator. itpath must be not null.
1767 callback is a custom function that gets called for every file.
1769 NB: This function takes the attribute ``_data`` as input but will only
1770 ever use its first and, if available, second element. Anything else in
1771 ``._data[]`` will be ignored.
1773 path = itpath['path']
1775 # Calls the callback function
1779 if path.startswith('delete://'):
1780 # the file has previously been deleted already in restore_backup in
1781 # all cases so we just need to finish
1784 # get data from newest index (_data[0])
1785 data = self._data[0]
1786 upath = self._deltatar.unprefixed(path)
1788 # to preserve parent directory mtime, we save it
1789 parent_dir = os.path.dirname(upath) or os.getcwd()
1790 if not os.path.exists(parent_dir):
1791 os.makedirs(parent_dir)
1792 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1794 # if path is found in the newest index as to be snapshotted, deal with it
1796 if path.startswith('snapshot://'):
1797 self.restore_file(itpath, data, path, l_no, upath)
1799 # now we restore parent_directory mtime
1800 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1803 # we go from index to index, finding the path in the index, then finding
1804 # the index with the most recent snapshot of the file being restored
1806 # Right now we support diff backups, only. No incremental backups.
1807 # As a result _data[0] is always the diff backup index
1808 # and _data[1] the full backup index.
1809 if len(self._data) == 2:
1810 data = self._data[1]
1811 d, l_no, dpath = self.find_path_in_index(data, upath)
1813 self._deltatar.logger.warning('Error restoring file %s from '
1814 'index, not found in index %s' % (path, data['path']))
1817 cur_path = d.get('path', '')
1818 if cur_path.startswith('delete://'):
1819 self._deltatar.logger.warning(('Strange thing happened, file '
1820 '%s was listed in first index but deleted by another '
1821 'one. Path was ignored and untouched.') % path)
1823 elif cur_path.startswith('snapshot://'):
1824 # this code path is reached when the file is unchanged
1825 # in the newest index and therefore of type 'list://'
1826 self.restore_file(d, data, path, l_no, dpath)
1828 # now we restore parent_directory mtime
1829 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1832 # error code path is reached when:
1833 # a) we have more than two indexes (unsupported atm)
1834 # b) both indexes contain a list:// entry (logic error)
1835 # c) we have just one index and it also contains list://
1836 self._deltatar.logger.warning(('Error restoring file %s from index, '
1837 'snapshot not found in any index') % path)
1839 def find_path_in_index(self, data, upath):
1840 # NOTE: we restart the iterator sometimes because the iterator can be
1841 # walked over completely multiple times, for example if one path if not
1842 # found in one index and we have to go to the next index.
1843 it = data['iterator']
1845 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
1846 d, l_no = it.__next__()
1848 d = data['last_itelement']
1849 l_no = data['last_lno']
1852 dpath = self._deltatar.unprefixed(d.get('path', ''))
1854 data['last_itelement'] = d
1855 data['last_lno'] = l_no
1856 return d, l_no, dpath
1858 up, dp = self._deltatar.compare_indexes(upath, dpath)
1859 # any time upath should have appeared before current dpath, it means
1860 # upath is just not in this index and we should stop
1862 data['last_itelement'] = d
1863 data['last_lno'] = l_no
1867 d, l_no = it.__next__()
1868 except StopIteration:
1869 data['last_itelement'] = d
1870 data['last_lno'] = l_no
1873 def restore_directories_permissions(self):
1875 Restore directory permissions when everything have been restored
1882 self._directories.sort(key=operator.attrgetter('name'))
1883 self._directories.reverse()
1885 # Set correct owner, mtime and filemode on directories.
1886 for member in self._directories:
1887 dirpath = member.name
1889 os.chmod(dirpath, member.mode)
1890 os.utime(dirpath, (member.mtime, member.mtime))
1892 # We have to be root to do so.
1894 g = grp.getgrnam(member.gname)[2]
1898 u = pwd.getpwnam(member.uname)[2]
1902 if member.issym and hasattr(os, "lchown"):
1903 os.lchown(dirpath, u, g)
1905 os.chown(dirpath, u, g)
1906 except EnvironmentError:
1907 raise tarfile.ExtractError("could not change owner")
1909 except tarfile.ExtractError as e:
1910 self._deltatar.logger.warning('tarfile: %s' % e)
1913 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
1915 Handles the new volumes
1917 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1918 volume_number, guess_name=True)
1919 volume_path = os.path.join(backup_path, volume_name)
1921 # we convert relative paths into absolute because CWD is changed
1922 if not os.path.isabs(volume_path):
1923 volume_path = os.path.join(cwd, volume_path)
1924 tarobj.open_volume(volume_path, encryption=encryption)
1926 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
1928 Restores a snapshot of a file from a specific backup
1930 op_type = file_data.get('type', -1)
1931 member = file_data.get('member', None)
1932 ismember = bool(member)
1934 # when member is set, then we can assume everything is right and we
1935 # just have to restore the path
1937 vol_no = file_data.get('volume', -1)
1939 if not isinstance(vol_no, int) or vol_no < 0:
1940 self._deltatar.logger.warning('unrecognized type to be restored: '
1941 '%s, line %d' % (op_type, l_no))
1943 # setup the volume that needs to be read. only needed when member is
1945 if index_data['curr_vol_no'] != vol_no:
1946 index_data['curr_vol_no'] = vol_no
1947 backup_path = os.path.dirname(index_data['path'])
1948 vol_name = self._deltatar.volume_name_func(backup_path,
1949 index_data['is_full'], vol_no, guess_name=True)
1950 vol_path = os.path.join(backup_path, vol_name)
1951 if index_data['vol_fd']:
1952 index_data['vol_fd'].close()
1953 index_data['vol_fd'] = open(vol_path, 'rb')
1955 # force reopen of the tarobj because of new volume
1956 if index_data['tarobj']:
1957 index_data['tarobj'].close()
1958 index_data['tarobj'] = None
1960 # seek tarfile if needed
1961 offset = file_data.get('offset', -1)
1962 if index_data['tarobj']:
1963 if self._disaster == tarfile.TOLERANCE_RESCUE:
1964 # force a seek and reopen
1965 index_data['tarobj'].close()
1966 index_data['tarobj'] = None
1969 member = index_data['tarobj'].__iter__().__next__()
1970 except tarfile.DecryptionError:
1972 except tarfile.CompressionError:
1975 if not member or member.path != file_data['path']:
1976 # force a seek and reopen
1977 index_data['tarobj'].close()
1978 index_data['tarobj'] = None
1981 # open the tarfile if needed
1982 if not index_data['tarobj']:
1983 index_data['vol_fd'].seek(offset)
1984 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1985 fileobj=index_data['vol_fd'],
1986 format=tarfile.GNU_FORMAT,
1987 concat='#' in self._deltatar.mode,
1988 encryption=index_data["decryptor"],
1989 new_volume_handler=index_data['new_volume_handler'],
1990 save_to_members=False,
1991 tolerance=self._disaster)
1993 member = index_data['tarobj'].__iter__().__next__()
1995 member.path = unprefixed_path
1996 member.name = unprefixed_path
1998 if op_type == 'directory':
1999 self.add_member_dir(member)
2000 member = copy.copy(member)
2001 member.mode = 0o0700
2003 # if it's an existing directory, we then don't need to recreate it
2004 # just set the right permissions, mtime and that kind of stuff
2005 if os.path.exists(member.path):
2009 # set current volume number in tarobj, otherwise the extraction of the
2010 # file might fail when trying to extract a multivolume member
2011 index_data['tarobj'].volume_number = index_data['curr_vol_no']
2013 def ignore_symlink (member, *_args):
2014 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
2016 # finally, restore the file
2017 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
2019 def add_member_dir(self, member):
2021 Add member dir to be restored at the end
2023 if not self.canchown:
2024 self._directories.append(DirItem(name=member.name, mode=member.mode,
2025 mtime=member.mtime))
2027 self._directories.append(DirItem(name=member.name, mode=member.mode,
2028 mtime=member.mtime, gname=member.gname, uname=member.uname,
2029 uid=member.uid, gid=member.gid, issym=member.issym()))
2031 class DirItem(object):
2032 def __init__(self, **kwargs):
2033 for k, v in kwargs.items():