3 # Copyright (C) 2013, 2014 Intra2net AG
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published
7 # by the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see
17 # <http://www.gnu.org/licenses/lgpl-3.0.html>
19 DELTATAR_HEADER_VERSION = 1
20 DELTATAR_PARAMETER_VERSION = 1
34 from functools import partial
39 class NullHandler(logging.Handler):
40 def emit(self, record):
44 logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
52 # encryption direction
53 CRYPTO_MODE_ENCRYPT = 0
54 CRYPTO_MODE_DECRYPT = 1
56 # The canonical extension for encrypted backup files regardless of the actual
57 # encryption parameters is “.pdtcrypt”. This is analogous to the encryption
58 # header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
59 # Since the introduction of the versioned header there no longer any need
60 # for encoding encryption parameters in the file extensions (“.aes128” and
62 PDTCRYPT_EXTENSION = "pdtcrypt"
66 AUXILIARY_FILE_INDEX = 0
67 AUXILIARY_FILE_INFO = 1
69 class DeltaTar(object):
71 Backup class used to create backups
74 # list of files to exclude in the backup creation or restore operation. It
75 # can contain python regular expressions.
78 # list of files to include in the backup creation or restore operation. It
79 # can contain python regular expressions. If empty, all files in the source
80 # path will be backed up (when creating a backup) or all the files in the
81 # backup will be restored (when restoring a backup), but if included_files
82 # is set then only the files include in the list will be processed.
85 # custom filter of files to be backed up (or restored). Unused and unset
86 # by default. The function receives a file path and must return a boolean.
89 # mode in which the delta will be created (when creating a backup) or
90 # opened (when restoring). Accepts modes analog to the tarfile library.
93 # used together with aes modes to encrypt and decrypt backups.
98 # parameter version to use when encrypting; note that this has no effect
99 # on decryption since the required settings are determined from the headers
100 crypto_version = DELTATAR_HEADER_VERSION
101 crypto_paramversion = None
103 # when encrypting or decrypting, these hold crypto handlers; created before
104 # establishing the Tarfile stream iff a password is supplied.
108 # python logger object.
111 # specifies the index mode in the same format as @param mode, but without
112 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
113 # that the index is encrypted if no password is given in the constructor.
116 # current time for this backup. Used for file names and file creation checks
119 # extra data to included in the header of the index file when creating a
123 # valid tarfile modes and their corresponding default file extension
124 __file_extensions_dict = {
133 '#gz.pdtcrypt': '.gz',
138 # valid index modes and their corresponding default file extension
139 __index_extensions_dict = {
143 'gz.pdtcrypt': '.gz',
147 # valid path prefixes
148 __path_prefix_list = [
154 def __init__(self, excluded_files=[], included_files=[],
155 filter_func=None, mode="", password=None,
156 crypto_key=None, nacl=None,
157 crypto_version=DELTATAR_HEADER_VERSION,
158 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
159 logger=None, index_mode=None, index_name_func=None,
160 volume_name_func=None):
162 Constructor. Configures the diff engine.
165 - excluded_files: list of files to exclude in the backup creation or
166 restore operation. It can contain python regular expressions.
168 - included_files: list of files to include in the backup creation or
169 restore operation. It can contain python regular expressions. If
170 empty, all files in the source path will be backed up (when creating a
171 backup) or all the files in the backup will be restored (when
172 restoring a backup), but if included_files is set then only the files
173 include in the list will be processed.
175 - filter_func: custom filter of files to be backed up (or restored).
176 Unused and unset by default. The function receives a file path and
177 must return a boolean.
179 - mode: mode in which the delta will be created (when creating a backup)
180 or opened (when restoring). Accepts the same modes as the tarfile
181 library. Valid modes are:
184 ':' open uncompressed
185 ':gz' open with gzip compression
186 ':bz2' open with bzip2 compression
187 '|' open an uncompressed stream of tar blocks
188 '|gz' open a gzip compressed stream of tar blocks
189 '|bz2' open a bzip2 compressed stream of tar blocks
190 '#gz' open a stream of gzip compressed tar blocks
192 - crypto_key: used to encrypt and decrypt backups. Encryption will
193 be enabled automatically if a key is supplied. Requires a salt to be
196 - nacl: salt that was used to derive the encryption key for embedding
197 in the PDTCRYPT header. Not needed when decrypting and when
198 encrypting with password.
200 - password: used to encrypt and decrypt backups. Encryption will be
201 enabled automatically if a password is supplied.
203 - crypto_version: version of the format, determining the kind of PDT
206 - crypto_paramversion: optionally request encryption conforming to
207 a specific parameter version. Defaults to the standard PDT value
208 which as of 2017 is the only one available.
210 - logger: python logger object. Optional.
212 - index_mode: specifies the index mode in the same format as @param
213 mode, but without the ':', '|' or '#' at the begining. If encryption
214 is requested it will extend to the auxiliary (index, info) files as
215 well. This is an optional parameter that will automatically mimic
216 @param mode by default if not provided. Valid modes are:
219 'gz' open with gzip compression
220 'bz2' open with bzip2 compression
222 - index_name_func: function that sets a custom name for the index file.
223 This function receives a flag to indicate whether the name will be
224 used for a full or diff backup. The backup path will be prepended to
227 - volume_name_func: function that defines the name of tar volumes. It
228 receives the backup_path, if it's a full backup and the volume number,
229 and must return the name for the corresponding volume name. Optional,
230 DeltaTar has default names for tar volumes.
233 if mode not in self.__file_extensions_dict:
234 raise Exception('Unrecognized extension mode=[%s] requested for files'
237 self.excluded_files = excluded_files
238 self.included_files = included_files
239 self.filter_func = filter_func
240 self.logger = logging.getLogger('deltatar.DeltaTar')
242 self.logger.addHandler(logger)
245 if crypto_key is not None:
246 self.crypto_key = crypto_key
247 self.nacl = nacl # encryption only
249 if password is not None:
250 self.password = password
252 if crypto_version is not None:
253 self.crypto_version = crypto_version
255 if crypto_paramversion is not None:
256 self.crypto_paramversion = crypto_paramversion
258 # generate index_mode
259 if index_mode is None:
265 elif mode not in self.__index_extensions_dict:
266 raise Exception('Unrecognized extension mode=[%s] requested for index'
269 self.index_mode = index_mode
270 self.current_time = datetime.datetime.now()
272 if index_name_func is not None:
273 self.index_name_func = index_name_func
275 if volume_name_func is not None:
276 self.volume_name_func = volume_name_func
278 def pick_extension(self, kind, mode=None):
280 Choose the extension depending on a) the kind of file given, b) the
281 processing mode, and c) the current encryption settings.
284 if kind == PDT_TYPE_ARCHIVE:
287 mode = self.__index_extensions_dict [self.index_mode]
289 if self.crypto_key is not None or self.password is not None:
290 ret += "." + PDTCRYPT_EXTENSION
293 def index_name_func(self, is_full): # pylint: disable=method-hidden
295 Callback for setting a custom name for the index file. Depending on
296 whether *is_full* is set, it will create a suitable name for a full
299 prefix = "bfull" if is_full else "bdiff"
300 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
301 extension = self.pick_extension \
303 self.__index_extensions_dict [self.index_mode])
305 return "%s-%s.index%s" % (prefix, date_str, extension)
307 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
308 is_full, volume_number,
311 function that defines the name of tar volumes. It receives the
312 backup_path, if it's a full backup and the volume number, and must return
313 the name for the corresponding volume name. Optional, DeltaTar has default
314 names for tar volumes.
316 If guess_name is activated, the file is intended not to be created but
317 to be found, and thus the date will be guessed.
319 prefix = "bfull" if is_full else "bdiff"
320 extension = self.pick_extension \
322 self.__file_extensions_dict [self.mode])
325 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
326 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
328 prefix = prefix + "-"
329 postfix = "-%03d%s" % (volume_number + 1, extension)
330 for f in os.listdir(backup_path):
331 if f.startswith(prefix) and f.endswith(postfix):
333 raise Exception("volume not found")
336 def filter_path(self, path, source_path="", is_dir=None):
338 Filters a path, given the source_path, using the filtering properties
339 set in the constructor.
340 The filtering order is:
341 1. included_files (if any)
343 3. filter_func (which must return whether the file is accepted or not)
346 if len(source_path) > 0:
347 # ensure that exactly one '/' at end of dir is also removed
348 source_path = source_path.rstrip(os.sep) + os.sep
349 path = path[len(source_path):]
351 # 1. filter included_files
353 if len(self.included_files) > 0:
355 for i in self.included_files:
356 # it can be either a regexp or a string
357 if isinstance(i, str):
358 # if the string matches, then continue
363 # if the string ends with / it's a directory, and if the
364 # path is contained in it, it is included
365 if i.endswith('/') and path.startswith(i):
369 # if the string doesn't end with /, add it and do the same
371 elif path.startswith(i + '/'):
375 # check for PARENT_MATCH
378 if not dir_path.endswith('/'):
381 if i.startswith(dir_path):
384 # if it's a reg exp, then we just check if it matches
385 elif isinstance(i, typing.Pattern):
390 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
392 if match == NO_MATCH:
395 # when a directory is in PARENT_MATCH, it doesn't matter if it's
396 # excluded. It's subfiles will be excluded, but the directory itself
398 if match != PARENT_MATCH:
399 for e in self.excluded_files:
400 # it can be either a regexp or a string
401 if isinstance(e, str):
402 # if the string matches, then exclude
406 # if the string ends with / it's a directory, and if the
407 # path starts with the directory, then exclude
408 if e.endswith('/') and path.startswith(e):
411 # if the string doesn't end with /, do the same check with
413 elif path.startswith(e + '/'):
416 # if it's a reg exp, then we just check if it matches
417 elif isinstance(e, typing.Pattern):
421 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
424 return self.filter_func(path)
428 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
430 Walk a directory recursively, yielding each file/directory
432 Returns the path of an entity. If ``keep_base_dir`` is set,
433 the path returned contains the prefix ``source_path``; otherwise it is
434 relative to the prefix.
437 source_path = source_path.rstrip(os.sep)
442 beginning_size = len(source_path) + 1 # +1 for os.sep
444 queue = [source_path]
447 cur_path = queue.pop(0)
450 dfd = os.open (cur_path, os.O_DIRECTORY)
451 except FileNotFoundError as exn:
452 self.logger.warning ("failed to open entity [%s] as directory; "
453 "file system (error: %s); skipping"
454 % (cur_path, str (exn)))
458 for filename in sorted(os.listdir(dfd)):
459 child = os.path.join(cur_path, filename)
460 is_dir = os.path.isdir(child)
461 status = self.filter_path(child, source_path, is_dir)
462 if status == NO_MATCH:
464 if not os.access(child, os.R_OK):
465 self.logger.warning('Error accessing possibly locked file %s' % child)
469 yield child[beginning_size:]
471 if is_dir and (status == MATCH or status == PARENT_MATCH):
476 def _stat_dict(self, path):
478 Returns a dict with the stat data used to compare files
480 stinfo = os.stat(path)
481 mode = stinfo.st_mode
484 if stat.S_ISDIR(mode):
486 elif stat.S_ISREG(mode):
488 elif stat.S_ISLNK(mode):
495 u'mtime': int(stinfo.st_mtime),
496 u'ctime': int(stinfo.st_ctime),
497 u'uid': stinfo.st_uid,
498 u'gid': stinfo.st_gid,
499 u'inode': stinfo.st_ino,
500 u'size': stinfo.st_size
503 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
505 Return if the dicts are equal in the stat keys
507 keys = [u'type', u'mode',u'size', u'mtime',
508 # not restored: u'inode', u'ctime'
511 # only if user is root, then also check gid/uid. otherwise do not check it,
512 # because tarfile can chown in case of being superuser only
514 # also, skip the check in rpmbuild since the sources end up with the
515 # uid:gid of the packager while the extracted files are 0:0.
516 if hasattr(os, "geteuid") and os.geteuid() == 0 \
517 and os.getenv ("RPMBUILD_OPTIONS") is None:
521 if (not d1 and d2 != None) or (d1 != None and not d2):
524 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
527 type = d1.get('type', '')
530 # size doesn't matter for directories
531 if type == 'directory' and key == 'size':
533 if d1.get(key, -1) != d2.get(key, -2):
537 def prefixed(self, path, listsnapshot_equal=False):
539 if a path is not prefixed, return it prefixed
541 for prefix in self.__path_prefix_list:
542 if path.startswith(prefix):
543 if listsnapshot_equal and prefix == u'list://':
544 return u'snapshot://' + path[len(prefix):]
546 return u'snapshot://' + path
548 def unprefixed(self, path):
550 remove a path prefix if any
552 for prefix in self.__path_prefix_list:
553 if path.startswith(prefix):
554 return path[len(prefix):]
558 def initialize_encryption (self, mode):
559 password = self.password
560 key = self.crypto_key
563 if key is None and password is None:
565 if mode == CRYPTO_MODE_ENCRYPT:
566 return crypto.Encrypt (password=password,
569 version=self.crypto_version,
570 paramversion=self.crypto_paramversion)
571 if mode == CRYPTO_MODE_DECRYPT:
572 return crypto.Decrypt (password=password, key=key)
574 raise Exception ("invalid encryption mode [%r]" % mode)
577 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
579 Given the specified configuration, opens a file for reading or writing,
580 inheriting the encryption and compression settings from the backup.
581 Returns a file object ready to use.
583 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
586 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
587 Both the info and the auxiliary file have a globally
588 unique, constant counter value.
591 if self.index_mode.startswith('gz'):
593 elif self.index_mode.startswith('bz2'):
601 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
603 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
605 if crypto_ctx is not None:
606 if kind == AUXILIARY_FILE_INFO:
607 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
608 elif kind == AUXILIARY_FILE_INDEX:
609 enccounter = crypto.AES_GCM_IV_CNT_INDEX
611 raise Exception ("invalid kind of aux file %r" % kind)
613 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
614 bufsize=tarfile.RECORDSIZE, fileobj=None,
615 encryption=crypto_ctx, enccounter=enccounter)
620 def create_full_backup(self, source_path, backup_path,
621 max_volume_size=None, extra_data=dict()):
623 Creates a full backup.
626 - source_path: source path to the directory to back up.
627 - backup_path: path where the back up will be stored. Backup path will
628 be created if not existent.
629 - max_volume_size: maximum volume size in megabytes. Used to split the
630 backup in volumes. Optional (won't split in volumes by default).
631 - extra_data: a json-serializable dictionary with information that you
632 want to be included in the header of the index file
635 if not isinstance(source_path, str):
636 raise Exception('Source path must be a string')
638 if not isinstance(backup_path, str):
639 raise Exception('Backup path must be a string')
641 if not os.path.exists(source_path) or not os.path.isdir(source_path):
642 raise Exception('Source path "%s" does not exist or is not a '\
643 'directory' % source_path)
645 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
646 max_volume_size < 1):
647 raise Exception('max_volume_size must be a positive integer')
648 if max_volume_size != None:
649 max_volume_size = max_volume_size*1024*1024
651 if not isinstance(extra_data, dict):
652 raise Exception('extra_data must be a dictionary')
655 extra_data_str = json.dumps(extra_data)
657 raise Exception('extra_data is not json-serializable')
659 if not os.access(source_path, os.R_OK):
660 raise Exception('Source path "%s" is not readable' % source_path)
662 # try to create backup path if needed
663 os.makedirs(backup_path, exist_ok=True)
665 if not os.access(backup_path, os.W_OK):
666 raise Exception('Backup path "%s" is not writeable' % backup_path)
668 if source_path.endswith('/'):
669 source_path = source_path[:-1]
671 if backup_path.endswith('/'):
672 backup_path = backup_path[:-1]
674 # update current time
675 self.current_time = datetime.datetime.now()
677 if self.mode not in self.__file_extensions_dict:
678 raise Exception('Unrecognized extension')
680 # setup for encrypting payload
681 if self.encryptor is None:
682 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
684 # some initialization
687 # generate the first volume name
688 vol_name = self.volume_name_func(backup_path, True, 0)
689 tarfile_path = os.path.join(backup_path, vol_name)
692 index_name = self.index_name_func(True)
693 index_path = os.path.join(backup_path, index_name)
694 index_sink = self.open_auxiliary_file(index_path, 'w')
698 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
700 Handles the new volumes
702 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
703 volume_path = os.path.join(backup_path, volume_name)
704 deltarobj.vol_no = volume_number
706 # we convert relative paths into absolute because CWD is changed
707 if not os.path.isabs(volume_path):
708 volume_path = os.path.join(cwd, volume_path)
710 if tarobj.fileobj is not None:
711 tarobj.fileobj.close()
713 deltarobj.logger.debug("opening volume %s" % volume_path)
715 tarobj.open_volume(volume_path, encryption=encryption)
717 # wraps some args from context into the handler
718 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
720 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
722 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
723 # calculate checksum and write into the stream
724 crc = binascii.crc32(s) & 0xFFFFffff
727 # start creating the tarfile
728 tarobj = tarfile.TarFile.open(tarfile_path,
729 mode='w' + self.mode,
730 format=tarfile.GNU_FORMAT,
731 concat='#' in self.mode,
732 encryption=self.encryptor,
733 max_volume_size=max_volume_size,
734 new_volume_handler=new_volume_handler,
735 save_to_members=False,
737 os.chdir(source_path)
739 # for each file to be in the backup, do:
740 for path in self._recursive_walk_dir('.'):
743 # calculate stat dict for current file
744 statd = self._stat_dict(path)
745 statd['path'] = u'snapshot://' + statd['path']
746 statd['volume'] = self.vol_no
749 tarobj.add(path, arcname = statd['path'], recursive=False)
750 except FileNotFoundError as exn:
751 # file vanished since the call to access(3) above
752 self.logger.warning ("object [%s] no longer available in "
753 "file system (error: %s); skipping"
755 continue # prevent indexing
757 # retrieve file offset
758 statd['offset'] = tarobj.get_last_member_offset()
759 self.logger.debug("backup %s" % statd['path'])
761 # store the stat dict in the index
762 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
763 crc = binascii.crc32(s, crc) & 0xffffffff
766 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
767 crc = binascii.crc32(s, crc) & 0xffffffff
769 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
774 index_sink.close (close_fileobj=True)
776 def create_diff_backup(self, source_path, backup_path, previous_index_path,
777 max_volume_size=None, extra_data=dict()):
782 - source_path: source path to the directory to back up.
783 - backup_path: path where the back up will be stored. Backup path will
784 be created if not existent.
785 - previous_index_path: index of the previous backup, needed to know
786 which files changed since then.
787 - max_volume_size: maximum volume size in megabytes (MB). Used to split
788 the backup in volumes. Optional (won't split in volumes by default).
790 NOTE: previous index is assumed to follow exactly the same format as
791 the index_mode setup in the constructor.
793 # check/sanitize input
794 if not isinstance(source_path, str):
795 raise Exception('Source path must be a string')
797 if not isinstance(backup_path, str):
798 raise Exception('Backup path must be a string')
800 if not os.path.exists(source_path) or not os.path.isdir(source_path):
801 raise Exception('Source path "%s" does not exist or is not a '\
802 'directory' % source_path)
804 if not isinstance(extra_data, dict):
805 raise Exception('extra_data must be a dictionary')
808 extra_data_str = json.dumps(extra_data)
810 raise Exception('extra_data is not json-serializable')
812 if not os.access(source_path, os.R_OK):
813 raise Exception('Source path "%s" is not readable' % source_path)
815 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
816 max_volume_size < 1):
817 raise Exception('max_volume_size must be a positive integer')
818 if max_volume_size != None:
819 max_volume_size = max_volume_size*1024*1024
821 if not isinstance(previous_index_path, str):
822 raise Exception('previous_index_path must be A string')
824 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
825 raise Exception('Index path "%s" does not exist or is not a '\
826 'file' % previous_index_path)
828 if not os.access(previous_index_path, os.R_OK):
829 raise Exception('Index path "%s" is not readable' % previous_index_path)
831 # try to create backup path if needed
832 os.makedirs(backup_path, exist_ok=True)
834 if not os.access(backup_path, os.W_OK):
835 raise Exception('Backup path "%s" is not writeable' % backup_path)
837 if source_path.endswith('/'):
838 source_path = source_path[:-1]
840 if backup_path.endswith('/'):
841 backup_path = backup_path[:-1]
843 # update current time
844 self.current_time = datetime.datetime.now()
846 if self.mode not in self.__file_extensions_dict:
847 raise Exception('Unrecognized extension')
849 # setup for encrypting payload
850 if self.encryptor is None:
851 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
853 # some initialization
856 # generate the first volume name
857 vol_name = self.volume_name_func(backup_path, is_full=False,
859 tarfile_path = os.path.join(backup_path, vol_name)
864 index_name = self.index_name_func(is_full=False)
865 index_path = os.path.join(backup_path, index_name)
866 index_sink = self.open_auxiliary_file(index_path, 'w')
868 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
870 Handles the new volumes
872 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
873 volume_number=volume_number)
874 volume_path = os.path.join(backup_path, volume_name)
875 deltarobj.vol_no = volume_number
877 # we convert relative paths into absolute because CWD is changed
878 if not os.path.isabs(volume_path):
879 volume_path = os.path.join(cwd, volume_path)
881 deltarobj.logger.debug("opening volume %s" % volume_path)
882 tarobj.open_volume(volume_path)
884 # wraps some args from context into the handler
885 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
887 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
889 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
890 # calculate checksum and write into the stream
891 crc = binascii.crc32(s) & 0xFFFFffff
894 # start creating the tarfile
895 tarobj = tarfile.TarFile.open(tarfile_path,
896 mode='w' + self.mode,
897 format=tarfile.GNU_FORMAT,
898 concat='#' in self.mode,
899 encryption=self.encryptor,
900 max_volume_size=max_volume_size,
901 new_volume_handler=new_volume_handler,
902 save_to_members=False,
906 # create the iterators, first the previous index iterator, then the
907 # source path directory iterator and collate and iterate them
908 if not os.path.isabs(previous_index_path):
909 previous_index_path = os.path.join(cwd, previous_index_path)
910 index_it = self.iterate_index_path(previous_index_path)
912 os.chdir(source_path)
913 dir_it = self._recursive_walk_dir('.')
914 dir_path_it = self.jsonize_path_iterator(dir_it)
922 # for each file to be in the backup, do:
923 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
925 # if file is not in the index, it means it's a new file, so we have
930 # if the file is not in the directory iterator, it means that it has
931 # been deleted, so we need to mark it as such
934 # if the file is in both iterators, it means it might have either
935 # not changed (in which case we will just list it in our index but
936 # it will not be included in the tar file), or it might have
937 # changed, in which case we will snapshot it.
938 elif ipath and dpath:
939 if self._equal_stat_dicts(ipath, dpath):
943 # TODO: when creating chained backups (i.e. diffing from another
944 # diff), we will need to detect the type of action in the previous
945 # index, because if it was delete and dpath is None, we should
948 if action == 'snapshot':
949 # calculate stat dict for current file
951 stat['path'] = "snapshot://" + dpath['path']
952 stat['volume'] = self.vol_no
954 self.logger.debug("[STORE] %s" % dpath['path'])
957 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
958 # retrieve file offset
959 stat['offset'] = tarobj.get_last_member_offset()
960 except FileNotFoundError as exn:
961 # file vanished since the call to access(3) above
962 self.logger.warning ("object [%s] no longer available in "
963 "file system (error: %s); skipping"
964 % (dpath ["path"], str (exn)))
965 stat = None # prevent indexing
967 elif action == 'delete':
968 path = self.unprefixed(ipath['path'])
970 u'path': u'delete://' + path,
971 u'type': ipath['type']
973 self.logger.debug("[DELETE] %s" % path)
975 # mark it as deleted in the backup
976 tarobj.add("/dev/null", arcname=stat['path'])
977 elif action == 'list':
979 path = self.unprefixed(ipath['path'])
980 stat['path'] = u'list://' + path
981 # unchanged files do not enter in the backup, only in the index
982 self.logger.debug("[UNCHANGED] %s" % path)
985 self.logger.warning('unknown action in create_diff_backup: {0}'
990 # store the stat dict in the index
991 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
992 crc = binascii.crc32(s, crc) & 0xffffffff
995 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
996 crc = binascii.crc32(s, crc) & 0xffffffff
998 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
1007 def iterate_index_path(self, index_path):
1009 Returns an index iterator. Internally, it uses a classic iterator class.
1010 We do that instead of just yielding so that the iterator object can have
1011 an additional function to close the file descriptor that is opened in
1015 class IndexPathIterator(object):
1016 def __init__(self, delta_tar, index_path):
1017 self.delta_tar = delta_tar
1018 self.index_path = index_path
1020 self.extra_data = dict()
1030 def __enter__(self):
1032 Allows this iterator to be used with the "with" statement
1035 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
1036 # check index header
1037 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1038 if j.get("type", '') != 'python-delta-tar-index' or\
1039 j.get('version', -1) != 1:
1040 raise Exception("invalid index file format: %s" % json.dumps(j))
1042 self.extra_data = j.get('extra_data', dict())
1044 # find BEGIN-FILE-LIST, ignore other headers
1046 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1047 if j.get('type', '') == 'BEGIN-FILE-LIST':
1051 def __exit__(self, type, value, tb):
1053 Allows this iterator to be used with the "with" statement
1060 # read each file in the index and process it to do the restore
1064 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1065 except Exception as e:
1070 op_type = j.get('type', '')
1072 # when we detect the end of the list, break the loop
1073 if op_type == 'END-FILE-LIST':
1079 if op_type not in ['directory', 'file', 'link']:
1080 self.delta_tar.logger.warning('unrecognized type to be '
1081 'restored: %s, line %d' % (op_type, l_no))
1083 return self.__next__()
1087 return IndexPathIterator(self, index_path)
1089 def iterate_tar_path(self, tar_path, new_volume_handler=None):
1091 Returns a tar iterator that iterates jsonized member items that contain
1092 an additional "member" field, used by RestoreHelper.
1094 class TarPathIterator(object):
1095 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
1096 self.delta_tar = delta_tar
1097 self.tar_path = tar_path
1099 self.last_member = None
1100 self.new_volume_handler = new_volume_handler
1108 self.tar_obj.close()
1110 def __enter__(self):
1112 Allows this iterator to be used with the "with" statement
1114 if self.tar_obj is None:
1116 if self.delta_tar.password is not None:
1117 decryptor = crypto.Decrypt \
1118 (password=self.delta_tar.password,
1119 key=self.delta_tar.crypto_key)
1120 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1121 mode='r' + self.delta_tar.mode,
1122 format=tarfile.GNU_FORMAT,
1123 concat='#' in self.delta_tar.mode,
1124 encryption=decryptor,
1125 new_volume_handler=self.new_volume_handler,
1126 save_to_members=False,
1130 def __exit__(self, type, value, tb):
1132 Allows this iterator to be used with the "with" statement
1135 self.tar_obj.close()
1140 Read each member and return it as a stat dict
1142 tarinfo = self.tar_obj.__iter__().__next__()
1143 # NOTE: here we compare if tarinfo.path is the same as before
1144 # instead of comparing the tarinfo object itself because the
1145 # object itself might change for multivol tarinfos
1146 if tarinfo is None or (self.last_member is not None and\
1147 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
1150 self.last_member = tarinfo
1153 if tarinfo.isfile():
1155 elif tarinfo.isdir():
1157 elif tarinfo.islnk() or tarinfo.issym():
1162 u'path': tarinfo.path,
1163 u'mode': tarinfo.mode,
1164 u'mtime': tarinfo.mtime,
1165 u'ctime': -1, # cannot restore
1166 u'uid': tarinfo.uid,
1167 u'gid': tarinfo.gid,
1168 u'inode': -1, # cannot restore
1169 u'size': tarinfo.size,
1173 return TarPathIterator(self, tar_path, new_volume_handler)
1175 def jsonize_path_iterator(self, iter, strip=0):
1177 converts the yielded items of an iterator into json path lines.
1179 strip: Strip the smallest prefix containing num leading slashes from
1184 path = iter.__next__()
1186 yield self._stat_dict(path), 0
1188 st = self._stat_dict(path)
1189 st['path'] = "/".join(path.split("/")[strip:])
1191 except StopIteration:
1194 def iterate_disaster_index (self, index):
1196 Mimick the behavior of the other object iterators, just with the inputs
1197 supplied directly as *index*.
1200 class RawIndexIterator(object):
1201 def __init__(self, delta_tar, index):
1202 self.delta_tar = delta_tar
1212 def __enter__(self):
1214 Allows this iterator to be used with the "with" statement
1216 self.iter = self.index.__iter__ ()
1219 def __exit__(self, type, value, tb):
1221 Allows this iterator to be used with the "with" statement
1225 idxent = self.iter.__next__ ()
1228 return RawIndexIterator(self, index)
1230 def collate_iterators(self, it1, it2):
1232 Collate two iterators, so that it returns pairs of the items of each
1233 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1234 when there's no match for the items in the other iterator.
1236 It assumes that the items in both lists are ordered in the same way.
1239 elem1, elem2 = None, None
1243 elem1, l_no = it1.__next__()
1244 except StopIteration:
1246 yield (None, elem2, l_no)
1248 if isinstance(elem2, tuple):
1250 yield (None, elem2, l_no)
1254 elem2 = it2.__next__()
1255 if isinstance(elem2, tuple):
1257 except StopIteration:
1259 yield (elem1, None, l_no)
1260 for elem1, l_no in it1:
1261 yield (elem1, None, l_no)
1264 index1 = self.unprefixed(elem1['path'])
1265 index2 = self.unprefixed(elem2['path'])
1266 i1, i2 = self.compare_indexes(index1, index2)
1268 yield1 = yield2 = None
1275 yield (yield1, yield2, l_no)
1277 def compare_indexes(self, index1, index2):
1279 Compare iterator indexes and return a tuple in the following form:
1280 if index1 < index2, returns (index1, None)
1281 if index1 == index2 returns (index1, index2)
1282 else: returns (None, index2)
1284 l1 = index1.split('/')
1285 l2 = index2.split('/')
1286 length = len(l2) - len(l1)
1289 return (index1, None)
1291 return (None, index2)
1293 for i1, i2 in zip(l1, l2):
1295 return (index1, None)
1297 return (None, index2)
1299 return (index1, index2)
1301 def list_backup(self, backup_tar_path, list_func=None):
1302 if not isinstance(backup_tar_path, str):
1303 raise Exception('Backup tar path must be a string')
1305 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1306 raise Exception('Source path "%s" does not exist or is not a '\
1307 'file' % backup_tar_path)
1309 if not os.access(backup_tar_path, os.R_OK):
1310 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1314 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
1316 Handles the new volumes
1318 volume_name = deltarobj.volume_name_func(backup_path, True,
1319 volume_number, guess_name=True)
1320 volume_path = os.path.join(backup_path, volume_name)
1322 # we convert relative paths into absolute because CWD is changed
1323 if not os.path.isabs(volume_path):
1324 volume_path = os.path.join(cwd, volume_path)
1325 tarobj.open_volume(volume_path, encryption=encryption)
1327 if self.decryptor is None:
1328 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1330 backup_path = os.path.dirname(backup_tar_path)
1331 if not os.path.isabs(backup_path):
1332 backup_path = os.path.join(cwd, backup_path)
1333 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
1335 tarobj = tarfile.TarFile.open(backup_tar_path,
1336 mode='r' + self.mode,
1337 format=tarfile.GNU_FORMAT,
1338 concat='#' in self.mode,
1339 encryption=self.decryptor,
1340 new_volume_handler=new_volume_handler,
1341 save_to_members=False,
1344 def filter(cls, list_func, tarinfo):
1345 if list_func is None:
1346 self.logger.info(tarinfo.path)
1350 filter = partial(filter, self, list_func)
1352 tarobj.extractall(filter=filter, unlink=True)
1355 def restore_backup(self, target_path, backup_indexes_paths=[],
1356 backup_tar_path=None, restore_callback=None,
1357 disaster=tarfile.TOLERANCE_STRICT, backup_index=None):
1362 - target_path: path to restore.
1363 - backup_indexes_paths: path to backup indexes, in descending date order.
1364 The indexes indicate the location of their respective backup volumes,
1365 and multiple indexes are needed to be able to restore diff backups.
1366 Note that this is an optional parameter: if not suplied, it will
1367 try to restore directly from backup_tar_path.
1368 - backup_tar_path: path to the backup tar file. Used as an alternative
1369 to backup_indexes_paths to restore directly from a tar file without
1370 using any file index. If it's a multivol tarfile, volume_name_func
1372 - restore_callback: callback function to be called during restore.
1373 This is passed to the helper and gets called for every file.
1375 NOTE: If you want to use an index to restore a backup, this function
1376 only supports to do so when the tarfile mode is either uncompressed or
1377 uses concat compress mode, because otherwise it would be very slow.
1379 NOTE: Indices are assumed to follow the same format as the index_mode
1380 specified in the constructor.
1382 Returns the list of files that could not be restored, if there were
1385 # check/sanitize input
1386 if not isinstance(target_path, str):
1387 raise Exception('Target path must be a string')
1389 if backup_indexes_paths is None and backup_tar_path == []:
1390 raise Exception("You have to either provide index paths or a tar path")
1392 if isinstance (backup_index, list) is True:
1394 elif len(backup_indexes_paths) == 0:
1400 if not isinstance(backup_tar_path, str):
1401 raise Exception('Backup tar path must be a string')
1403 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1404 raise Exception('Source path "%s" does not exist or is not a '\
1405 'file' % backup_tar_path)
1407 if not os.access(backup_tar_path, os.R_OK):
1408 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1410 if not isinstance(backup_indexes_paths, list):
1411 raise Exception('backup_indexes_paths must be a list')
1413 if self.mode.startswith(':') or self.mode.startswith('|'):
1414 raise Exception('Restore only supports either uncompressed tars'
1415 ' or concat compression when restoring from an index, and '
1416 ' the open mode you provided is "%s"' % self.mode)
1418 for index in backup_indexes_paths:
1419 if not isinstance(index, str):
1420 raise Exception('indices must be strings')
1422 if not os.path.exists(index) or not os.path.isfile(index):
1423 raise Exception('Index path "%s" does not exist or is not a '\
1426 if not os.access(index, os.R_OK):
1427 raise Exception('Index path "%s" is not readable' % index)
1429 # try to create backup path if needed
1430 os.makedirs(target_path, exist_ok=True)
1432 # make backup_tar_path absolute so that iterate_tar_path works fine
1433 if backup_tar_path and not os.path.isabs(backup_tar_path):
1434 backup_tar_path = os.path.abspath(backup_tar_path)
1437 os.chdir(target_path)
1439 # setup for decrypting payload
1440 if self.decryptor is None:
1441 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1444 index_it = self.iterate_tar_path(backup_tar_path)
1445 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
1446 tarobj=index_it.tar_obj)
1447 elif mode == "diff":
1448 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1451 # get iterator from newest index at _data[0]
1452 index1 = helper._data[0]["path"]
1453 index_it = self.iterate_index_path(index1)
1454 except tarfile.DecryptionError as exn:
1455 self.logger.error("failed to decrypt file [%s]: %s; is this an "
1456 "actual encrypted index file?"
1457 % (index1, str (exn)))
1458 return [(index1, exn)]
1459 except Exception as exn:
1461 self.logger.error("failed to read file [%s]: %s; is this an "
1462 "actual index file?" % (index1, str (exn)))
1463 return [(index1, exn)]
1464 elif mode == "disaster":
1465 index_it = self.iterate_disaster_index (backup_index)
1466 helper = RestoreHelper (self, cwd, backup_path=backup_tar_path,
1467 backup_index=backup_index,
1471 dir_it = self._recursive_walk_dir('.')
1472 dir_path_it = self.jsonize_path_iterator(dir_it)
1474 failed = [] # irrecoverable files
1476 # for each file to be restored, do:
1477 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1479 upath = dpath['path']
1480 op_type = dpath['type']
1482 upath = self.unprefixed(ipath['path'])
1483 op_type = ipath['type']
1486 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
1489 # if types of the file mismatch, the file needs to be deleted
1491 if ipath is not None and dpath is not None and\
1492 dpath['type'] != ipath['type']:
1493 helper.delete(upath)
1495 # if file not found in dpath, we can directly restore from index
1497 # if the file doesn't exist and it needs to be deleted, it
1498 # means that work is already done
1499 if ipath['path'].startswith('delete://'):
1502 self.logger.debug("restore %s" % ipath['path'])
1503 helper.restore(ipath, l_no, restore_callback)
1504 except Exception as e:
1505 iipath = ipath.get ("path", "")
1506 self.logger.error("FAILED to restore: {} ({})"
1508 if disaster != tarfile.TOLERANCE_STRICT:
1509 failed.append ((iipath, e))
1512 # if both files are equal, we have nothing to restore
1513 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1516 # we have to restore the file, but first we need to delete the
1517 # current existing file.
1518 # we don't delete the file if it's a directory, because it might
1519 # just have changed mtime, so it's quite inefficient to remove
1522 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
1523 helper.delete(upath)
1524 self.logger.debug("restore %s" % ipath['path'])
1526 helper.restore(ipath, l_no, restore_callback)
1527 except Exception as e:
1528 if disaster == tarfile.TOLERANCE_STRICT:
1530 failed.append ((ipath.get ("path", ""), e))
1533 # if the file is not in the index (so it comes from the target
1534 # directory) then we have to delete it
1536 self.logger.debug("delete %s" % upath)
1537 helper.delete(upath)
1539 helper.restore_directories_permissions()
1547 def recover_backup(self, target_path, backup_indexes_paths=[],
1548 restore_callback=None):
1550 Walk the index, extracting objects in disaster mode. Bad files are
1551 reported along with a reason.
1553 return self.restore_backup(target_path,
1554 backup_indexes_paths=backup_indexes_paths,
1555 disaster=tarfile.TOLERANCE_RECOVER)
1558 def rescue_backup(self, target_path, backup_tar_path,
1559 restore_callback=None):
1561 More aggressive “unfsck” mode: do not rely on the index data as the
1562 files may be corrupt; skim files for header-like information and
1563 attempt to retrieve the data.
1565 def gen_volume_name (nvol):
1566 return os.path.join (os.path.dirname (backup_tar_path),
1567 self.volume_name_func (backup_tar_path,
1571 backup_index = tarfile.gen_rescue_index (gen_volume_name,
1573 password=self.password,
1574 key=self.crypto_key)
1576 return self.restore_backup(target_path,
1577 backup_index=backup_index,
1578 backup_tar_path=backup_tar_path,
1579 disaster=tarfile.TOLERANCE_RESCUE)
1582 def _parse_json_line(self, f, l_no):
1584 Read line from file like object and process it as JSON.
1589 j = json.loads(l.decode('UTF-8'))
1590 except UnicodeDecodeError as e:
1591 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1593 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1594 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1597 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1598 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1600 except ValueError as e:
1601 raise Exception("error parsing this json line "
1602 "(line number %d): %s" % (l_no, l))
1606 class RestoreHelper(object):
1608 Class used to help to restore files from indices
1611 # holds the dicts of data
1618 # list of directories to be restored. This is done as a last step, see
1619 # tarfile.extractall for details.
1622 _disaster = tarfile.TOLERANCE_STRICT
1624 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
1625 backup_index=None, tarobj=None,
1626 disaster=tarfile.TOLERANCE_STRICT):
1628 Constructor opens the tars and init the data structures.
1632 - Index list must be provided in reverse order (newer first).
1633 - “newer first” apparently means that if there are n backups
1634 provided, the last full backup is at index n-1 and the most recent
1635 diff backup is at index 0.
1636 - Only the first, the second, and the last elements of
1637 ``index_list`` are relevant, others will not be accessed.
1638 - If no ``index_list`` is provided, both ``tarobj`` and
1639 ``backup_path`` must be passed.
1640 - If ``index_list`` is provided, the values of ``tarobj`` and
1641 ``backup_path`` are ignored.
1644 self._directories = []
1645 self._deltatar = deltatar
1647 self._password = deltatar.password
1648 self._crypto_key = deltatar.crypto_key
1649 self._decryptors = []
1650 self._disaster = disaster
1657 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1658 self.canchown = True
1660 self.canchown = False
1662 if isinstance (backup_index, list) is True:
1663 decryptor = self._deltatar.decryptor
1665 [{ "curr_vol_no" : None
1669 , "path" : backup_path
1672 , "last_itelement" : None
1674 , "new_volume_handler" :
1675 partial(self.new_volume_handler,
1676 self._deltatar, self._cwd, True,
1677 os.path.dirname(backup_path), decryptor)
1678 , "decryptor" : decryptor
1680 elif index_list is not None:
1681 for index in index_list:
1682 is_full = index == index_list[-1]
1685 if self._password is not None:
1686 decryptor = crypto.Decrypt (password=self._password,
1687 key=self._crypto_key)
1689 # make paths absolute to avoid cwd problems
1690 if not os.path.isabs(index):
1691 index = os.path.normpath(os.path.join(cwd, index))
1701 last_itelement = None,
1703 new_volume_handler = partial(self.new_volume_handler,
1704 self._deltatar, self._cwd, is_full,
1705 os.path.dirname(index), decryptor),
1706 decryptor = decryptor
1708 self._data.append(s)
1710 # make paths absolute to avoid cwd problems
1711 if not os.path.isabs(backup_path):
1712 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
1714 # update the new_volume_handler of tar_obj
1715 tarobj.new_volume_handler = partial(self.new_volume_handler,
1716 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
1717 self._deltatar.decryptor)
1726 last_itelement = None,
1728 new_volume_handler = tarobj.new_volume_handler,
1729 decryptor = self._deltatar.decryptor
1731 self._data.append(s)
1736 Closes all open files
1738 for data in self._data:
1740 data['vol_fd'].close()
1741 data['vol_fd'] = None
1743 data['tarobj'].close()
1744 data['tarobj'] = None
1746 def delete(self, path):
1750 if not os.path.exists(path):
1753 # to preserve parent directory mtime, we save it
1754 parent_dir = os.path.dirname(path) or os.getcwd()
1755 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1757 if os.path.isdir(path) and not os.path.islink(path):
1762 # now we restore parent_directory mtime
1763 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1765 def restore(self, itpath, l_no, callback=None):
1767 Restore the path from the appropriate backup. Receives the current path
1768 from the newest (=first) index iterator. itpath must be not null.
1769 callback is a custom function that gets called for every file.
1771 NB: This function takes the attribute ``_data`` as input but will only
1772 ever use its first and, if available, second element. Anything else in
1773 ``._data[]`` will be ignored.
1775 path = itpath['path']
1777 # Calls the callback function
1781 if path.startswith('delete://'):
1782 # the file has previously been deleted already in restore_backup in
1783 # all cases so we just need to finish
1786 # get data from newest index (_data[0])
1787 data = self._data[0]
1788 upath = self._deltatar.unprefixed(path)
1790 # to preserve parent directory mtime, we save it
1791 parent_dir = os.path.dirname(upath) or os.getcwd()
1792 os.makedirs(parent_dir, exist_ok=True)
1793 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1795 # if path is found in the newest index as to be snapshotted, deal with it
1797 if path.startswith('snapshot://'):
1798 self.restore_file(itpath, data, path, l_no, upath)
1800 # now we restore parent_directory mtime
1801 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1804 # we go from index to index, finding the path in the index, then finding
1805 # the index with the most recent snapshot of the file being restored
1807 # Right now we support diff backups, only. No incremental backups.
1808 # As a result _data[0] is always the diff backup index
1809 # and _data[1] the full backup index.
1810 if len(self._data) == 2:
1811 data = self._data[1]
1812 d, l_no, dpath = self.find_path_in_index(data, upath)
1814 self._deltatar.logger.warning('Error restoring file %s from '
1815 'index, not found in index %s' % (path, data['path']))
1818 cur_path = d.get('path', '')
1819 if cur_path.startswith('delete://'):
1820 self._deltatar.logger.warning(('Strange thing happened, file '
1821 '%s was listed in first index but deleted by another '
1822 'one. Path was ignored and untouched.') % path)
1824 elif cur_path.startswith('snapshot://'):
1825 # this code path is reached when the file is unchanged
1826 # in the newest index and therefore of type 'list://'
1827 self.restore_file(d, data, path, l_no, dpath)
1829 # now we restore parent_directory mtime
1830 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1833 # error code path is reached when:
1834 # a) we have more than two indexes (unsupported atm)
1835 # b) both indexes contain a list:// entry (logic error)
1836 # c) we have just one index and it also contains list://
1837 self._deltatar.logger.warning(('Error restoring file %s from index, '
1838 'snapshot not found in any index') % path)
1840 def find_path_in_index(self, data, upath):
1841 # NOTE: we restart the iterator sometimes because the iterator can be
1842 # walked over completely multiple times, for example if one path if not
1843 # found in one index and we have to go to the next index.
1844 it = data['iterator']
1846 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
1847 d, l_no = it.__next__()
1849 d = data['last_itelement']
1850 l_no = data['last_lno']
1853 dpath = self._deltatar.unprefixed(d.get('path', ''))
1855 data['last_itelement'] = d
1856 data['last_lno'] = l_no
1857 return d, l_no, dpath
1859 up, dp = self._deltatar.compare_indexes(upath, dpath)
1860 # any time upath should have appeared before current dpath, it means
1861 # upath is just not in this index and we should stop
1863 data['last_itelement'] = d
1864 data['last_lno'] = l_no
1868 d, l_no = it.__next__()
1869 except StopIteration:
1870 data['last_itelement'] = d
1871 data['last_lno'] = l_no
1874 def restore_directories_permissions(self):
1876 Restore directory permissions when everything have been restored
1883 self._directories.sort(key=operator.attrgetter('name'))
1884 self._directories.reverse()
1886 # Set correct owner, mtime and filemode on directories.
1887 for member in self._directories:
1888 dirpath = member.name
1890 os.chmod(dirpath, member.mode)
1891 os.utime(dirpath, (member.mtime, member.mtime))
1893 # We have to be root to do so.
1895 g = grp.getgrnam(member.gname)[2]
1899 u = pwd.getpwnam(member.uname)[2]
1903 if member.issym and hasattr(os, "lchown"):
1904 os.lchown(dirpath, u, g)
1906 os.chown(dirpath, u, g)
1907 except EnvironmentError:
1908 raise tarfile.ExtractError("could not change owner")
1910 except tarfile.ExtractError as e:
1911 self._deltatar.logger.warning('tarfile: %s' % e)
1914 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
1916 Handles the new volumes
1918 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1919 volume_number, guess_name=True)
1920 volume_path = os.path.join(backup_path, volume_name)
1922 # we convert relative paths into absolute because CWD is changed
1923 if not os.path.isabs(volume_path):
1924 volume_path = os.path.join(cwd, volume_path)
1925 tarobj.open_volume(volume_path, encryption=encryption)
1927 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
1929 Restores a snapshot of a file from a specific backup
1931 op_type = file_data.get('type', -1)
1932 member = file_data.get('member', None)
1933 ismember = bool(member)
1935 # when member is set, then we can assume everything is right and we
1936 # just have to restore the path
1938 vol_no = file_data.get('volume', -1)
1940 if not isinstance(vol_no, int) or vol_no < 0:
1941 self._deltatar.logger.warning('unrecognized type to be restored: '
1942 '%s, line %d' % (op_type, l_no))
1944 # setup the volume that needs to be read. only needed when member is
1946 if index_data['curr_vol_no'] != vol_no:
1947 index_data['curr_vol_no'] = vol_no
1948 backup_path = os.path.dirname(index_data['path'])
1949 vol_name = self._deltatar.volume_name_func(backup_path,
1950 index_data['is_full'], vol_no, guess_name=True)
1951 vol_path = os.path.join(backup_path, vol_name)
1952 if index_data['vol_fd']:
1953 index_data['vol_fd'].close()
1954 index_data['vol_fd'] = open(vol_path, 'rb')
1956 # force reopen of the tarobj because of new volume
1957 if index_data['tarobj']:
1958 index_data['tarobj'].close()
1959 index_data['tarobj'] = None
1961 # seek tarfile if needed
1962 offset = file_data.get('offset', -1)
1963 if index_data['tarobj']:
1964 if self._disaster == tarfile.TOLERANCE_RESCUE:
1965 # force a seek and reopen
1966 index_data['tarobj'].close()
1967 index_data['tarobj'] = None
1970 member = index_data['tarobj'].__iter__().__next__()
1971 except tarfile.DecryptionError:
1973 except tarfile.CompressionError:
1976 if not member or member.path != file_data['path']:
1977 # force a seek and reopen
1978 index_data['tarobj'].close()
1979 index_data['tarobj'] = None
1982 # open the tarfile if needed
1983 if not index_data['tarobj']:
1984 index_data['vol_fd'].seek(offset)
1985 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1986 fileobj=index_data['vol_fd'],
1987 format=tarfile.GNU_FORMAT,
1988 concat='#' in self._deltatar.mode,
1989 encryption=index_data["decryptor"],
1990 new_volume_handler=index_data['new_volume_handler'],
1991 save_to_members=False,
1992 tolerance=self._disaster)
1994 member = index_data['tarobj'].__iter__().__next__()
1996 member.path = unprefixed_path
1997 member.name = unprefixed_path
1999 if op_type == 'directory':
2000 self.add_member_dir(member)
2001 member = copy.copy(member)
2002 member.mode = 0o0700
2004 # if it's an existing directory, we then don't need to recreate it
2005 # just set the right permissions, mtime and that kind of stuff
2006 if os.path.exists(member.path):
2010 # set current volume number in tarobj, otherwise the extraction of the
2011 # file might fail when trying to extract a multivolume member
2012 index_data['tarobj'].volume_number = index_data['curr_vol_no']
2014 def ignore_symlink (member, *_args):
2015 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
2017 # finally, restore the file
2018 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink,
2021 def add_member_dir(self, member):
2023 Add member dir to be restored at the end
2025 if not self.canchown:
2026 self._directories.append(DirItem(name=member.name, mode=member.mode,
2027 mtime=member.mtime))
2029 self._directories.append(DirItem(name=member.name, mode=member.mode,
2030 mtime=member.mtime, gname=member.gname, uname=member.uname,
2031 uid=member.uid, gid=member.gid, issym=member.issym()))
2033 class DirItem(object):
2034 def __init__(self, **kwargs):
2035 for k, v in kwargs.items():