3 # Copyright (C) 2013, 2014 Intra2net AG
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published
7 # by the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see
17 # <http://www.gnu.org/licenses/lgpl-3.0.html>
19 DELTATAR_HEADER_VERSION = 1
20 DELTATAR_PARAMETER_VERSION = 1
33 from functools import partial
38 class NullHandler(logging.Handler):
39 def emit(self, record):
43 logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
51 # encryption direction
52 CRYPTO_MODE_ENCRYPT = 0
53 CRYPTO_MODE_DECRYPT = 1
55 # The canonical extension for encrypted backup files regardless of the actual
56 # encryption parameters is “.pdtcrypt”. This is analogous to the encryption
57 # header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
58 # Since the introduction of the versioned header there no longer any need
59 # for encoding encryption parameters in the file extensions (“.aes128” and
61 PDTCRYPT_EXTENSION = "pdtcrypt"
65 AUXILIARY_FILE_INDEX = 0
66 AUXILIARY_FILE_INFO = 1
68 class DeltaTar(object):
70 Backup class used to create backups
73 # list of files to exclude in the backup creation or restore operation. It
74 # can contain python regular expressions.
77 # list of files to include in the backup creation or restore operation. It
78 # can contain python regular expressions. If empty, all files in the source
79 # path will be backed up (when creating a backup) or all the files in the
80 # backup will be restored (when restoring a backup), but if included_files
81 # is set then only the files include in the list will be processed.
84 # custom filter of files to be backed up (or restored). Unused and unset
85 # by default. The function receives a file path and must return a boolean.
88 # mode in which the delta will be created (when creating a backup) or
89 # opened (when restoring). Accepts modes analog to the tarfile library.
92 # used together with aes modes to encrypt and decrypt backups.
97 # parameter version to use when encrypting; note that this has no effect
98 # on decryption since the required settings are determined from the headers
99 crypto_version = DELTATAR_HEADER_VERSION
100 crypto_paramversion = None
102 # when encrypting or decrypting, these hold crypto handlers; created before
103 # establishing the Tarfile stream iff a password is supplied.
107 # python logger object.
110 # specifies the index mode in the same format as @param mode, but without
111 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
112 # that the index is encrypted if no password is given in the constructor.
115 # current time for this backup. Used for file names and file creation checks
118 # extra data to included in the header of the index file when creating a
122 # valid tarfile modes and their corresponding default file extension
123 __file_extensions_dict = {
132 '#gz.pdtcrypt': '.gz',
137 # valid index modes and their corresponding default file extension
138 __index_extensions_dict = {
142 'gz.pdtcrypt': '.gz',
146 # valid path prefixes
147 __path_prefix_list = [
153 def __init__(self, excluded_files=[], included_files=[],
154 filter_func=None, mode="", password=None,
155 crypto_key=None, nacl=None,
156 crypto_version=DELTATAR_HEADER_VERSION,
157 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
158 logger=None, index_mode=None, index_name_func=None,
159 volume_name_func=None):
161 Constructor. Configures the diff engine.
164 - excluded_files: list of files to exclude in the backup creation or
165 restore operation. It can contain python regular expressions.
167 - included_files: list of files to include in the backup creation or
168 restore operation. It can contain python regular expressions. If
169 empty, all files in the source path will be backed up (when creating a
170 backup) or all the files in the backup will be restored (when
171 restoring a backup), but if included_files is set then only the files
172 include in the list will be processed.
174 - filter_func: custom filter of files to be backed up (or restored).
175 Unused and unset by default. The function receives a file path and
176 must return a boolean.
178 - mode: mode in which the delta will be created (when creating a backup)
179 or opened (when restoring). Accepts the same modes as the tarfile
180 library. Valid modes are:
183 ':' open uncompressed
184 ':gz' open with gzip compression
185 ':bz2' open with bzip2 compression
186 '|' open an uncompressed stream of tar blocks
187 '|gz' open a gzip compressed stream of tar blocks
188 '|bz2' open a bzip2 compressed stream of tar blocks
189 '#gz' open a stream of gzip compressed tar blocks
191 - crypto_key: used to encrypt and decrypt backups. Encryption will
192 be enabled automatically if a key is supplied. Requires a salt to be
195 - nacl: salt that was used to derive the encryption key for embedding
196 in the PDTCRYPT header. Not needed when decrypting and when
197 encrypting with password.
199 - password: used to encrypt and decrypt backups. Encryption will be
200 enabled automatically if a password is supplied.
202 - crypto_version: version of the format, determining the kind of PDT
205 - crypto_paramversion: optionally request encryption conforming to
206 a specific parameter version. Defaults to the standard PDT value
207 which as of 2017 is the only one available.
209 - logger: python logger object. Optional.
211 - index_mode: specifies the index mode in the same format as @param
212 mode, but without the ':', '|' or '#' at the begining. If encryption
213 is requested it will extend to the auxiliary (index, info) files as
214 well. This is an optional parameter that will automatically mimic
215 @param mode by default if not provided. Valid modes are:
218 'gz' open with gzip compression
219 'bz2' open with bzip2 compression
221 - index_name_func: function that sets a custom name for the index file.
222 This function receives a flag to indicate whether the name will be
223 used for a full or diff backup. The backup path will be prepended to
226 - volume_name_func: function that defines the name of tar volumes. It
227 receives the backup_path, if it's a full backup and the volume number,
228 and must return the name for the corresponding volume name. Optional,
229 DeltaTar has default names for tar volumes.
232 if mode not in self.__file_extensions_dict:
233 raise Exception('Unrecognized extension mode=[%s] requested for files'
236 self.excluded_files = excluded_files
237 self.included_files = included_files
238 self.filter_func = filter_func
239 self.logger = logging.getLogger('deltatar.DeltaTar')
241 self.logger.addHandler(logger)
244 if crypto_key is not None:
245 self.crypto_key = crypto_key
246 self.nacl = nacl # encryption only
248 if password is not None:
249 self.password = password
251 if crypto_version is not None:
252 self.crypto_version = crypto_version
254 if crypto_paramversion is not None:
255 self.crypto_paramversion = crypto_paramversion
257 # generate index_mode
258 if index_mode is None:
264 elif mode not in self.__index_extensions_dict:
265 raise Exception('Unrecognized extension mode=[%s] requested for index'
268 self.index_mode = index_mode
269 self.current_time = datetime.datetime.now()
271 if index_name_func is not None:
272 self.index_name_func = index_name_func
274 if volume_name_func is not None:
275 self.volume_name_func = volume_name_func
277 def pick_extension(self, kind, mode=None):
279 Choose the extension depending on a) the kind of file given, b) the
280 processing mode, and c) the current encryption settings.
283 if kind == PDT_TYPE_ARCHIVE:
286 mode = self.__index_extensions_dict [self.index_mode]
288 if self.crypto_key is not None or self.password is not None:
289 ret += "." + PDTCRYPT_EXTENSION
292 def index_name_func(self, is_full): # pylint: disable=method-hidden
294 Callback for setting a custom name for the index file. Depending on
295 whether *is_full* is set, it will create a suitable name for a full
298 prefix = "bfull" if is_full else "bdiff"
299 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
300 extension = self.pick_extension \
302 self.__index_extensions_dict [self.index_mode])
304 return "%s-%s.index%s" % (prefix, date_str, extension)
306 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
307 is_full, volume_number,
310 function that defines the name of tar volumes. It receives the
311 backup_path, if it's a full backup and the volume number, and must return
312 the name for the corresponding volume name. Optional, DeltaTar has default
313 names for tar volumes.
315 If guess_name is activated, the file is intended not to be created but
316 to be found, and thus the date will be guessed.
318 prefix = "bfull" if is_full else "bdiff"
319 extension = self.pick_extension \
321 self.__file_extensions_dict [self.mode])
324 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
325 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
327 prefix = prefix + "-"
328 postfix = "-%03d%s" % (volume_number + 1, extension)
329 for f in os.listdir(backup_path):
330 if f.startswith(prefix) and f.endswith(postfix):
332 raise Exception("volume not found")
335 def filter_path(self, path, source_path="", is_dir=None):
337 Filters a path, given the source_path, using the filtering properties
338 set in the constructor.
339 The filtering order is:
340 1. included_files (if any)
342 3. filter_func (which must return whether the file is accepted or not)
345 if len(source_path) > 0:
346 # ensure that exactly one '/' at end of dir is also removed
347 source_path = source_path.rstrip(os.sep) + os.sep
348 path = path[len(source_path):]
350 # 1. filter included_files
352 if len(self.included_files) > 0:
354 for i in self.included_files:
355 # it can be either a regexp or a string
356 if isinstance(i, str):
357 # if the string matches, then continue
362 # if the string ends with / it's a directory, and if the
363 # path is contained in it, it is included
364 if i.endswith('/') and path.startswith(i):
368 # if the string doesn't end with /, add it and do the same
370 elif path.startswith(i + '/'):
374 # check for PARENT_MATCH
377 if not dir_path.endswith('/'):
380 if i.startswith(dir_path):
383 # if it's a reg exp, then we just check if it matches
384 elif isinstance(i, re._pattern_type):
389 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
391 if match == NO_MATCH:
394 # when a directory is in PARENT_MATCH, it doesn't matter if it's
395 # excluded. It's subfiles will be excluded, but the directory itself
397 if match != PARENT_MATCH:
398 for e in self.excluded_files:
399 # it can be either a regexp or a string
400 if isinstance(e, str):
401 # if the string matches, then exclude
405 # if the string ends with / it's a directory, and if the
406 # path starts with the directory, then exclude
407 if e.endswith('/') and path.startswith(e):
410 # if the string doesn't end with /, do the same check with
412 elif path.startswith(e + '/'):
415 # if it's a reg exp, then we just check if it matches
416 elif isinstance(e, re._pattern_type):
420 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
423 return self.filter_func(path)
427 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
429 Walk a directory recursively, yielding each file/directory
431 Returns the path of an entity. If ``keep_base_dir`` is set,
432 the path returned contains the prefix ``source_path``; otherwise it is
433 relative to the prefix.
436 source_path = source_path.rstrip(os.sep)
441 beginning_size = len(source_path) + 1 # +1 for os.sep
443 queue = [source_path]
446 cur_path = queue.pop(0)
448 dfd = os.open (cur_path, os.O_DIRECTORY)
449 if dfd == -1: # it might have been removed in the meantime
453 for filename in sorted(os.listdir(dfd)):
454 child = os.path.join(cur_path, filename)
455 is_dir = os.path.isdir(child)
456 status = self.filter_path(child, source_path, is_dir)
457 if status == NO_MATCH:
459 if not os.access(child, os.R_OK):
460 self.logger.warning('Error accessing possibly locked file %s' % child)
464 yield child[beginning_size:]
466 if is_dir and (status == MATCH or status == PARENT_MATCH):
471 def _stat_dict(self, path):
473 Returns a dict with the stat data used to compare files
475 stinfo = os.stat(path)
476 mode = stinfo.st_mode
479 if stat.S_ISDIR(mode):
481 elif stat.S_ISREG(mode):
483 elif stat.S_ISLNK(mode):
490 u'mtime': int(stinfo.st_mtime),
491 u'ctime': int(stinfo.st_ctime),
492 u'uid': stinfo.st_uid,
493 u'gid': stinfo.st_gid,
494 u'inode': stinfo.st_ino,
495 u'size': stinfo.st_size
498 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
500 Return if the dicts are equal in the stat keys
502 keys = [u'type', u'mode',u'size', u'mtime',
503 # not restored: u'inode', u'ctime'
506 # only if user is root, then also check gid/uid. otherwise do not check it,
507 # because tarfile can chown in case of being superuser only
509 # also, skip the check in rpmbuild since the sources end up with the
510 # uid:gid of the packager while the extracted files are 0:0.
511 if hasattr(os, "geteuid") and os.geteuid() == 0 \
512 and os.getenv ("RPMBUILD_OPTIONS") is None:
516 if (not d1 and d2 != None) or (d1 != None and not d2):
519 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
522 type = d1.get('type', '')
525 # size doesn't matter for directories
526 if type == 'directory' and key == 'size':
528 if d1.get(key, -1) != d2.get(key, -2):
532 def prefixed(self, path, listsnapshot_equal=False):
534 if a path is not prefixed, return it prefixed
536 for prefix in self.__path_prefix_list:
537 if path.startswith(prefix):
538 if listsnapshot_equal and prefix == u'list://':
539 return u'snapshot://' + path[len(prefix):]
541 return u'snapshot://' + path
543 def unprefixed(self, path):
545 remove a path prefix if any
547 for prefix in self.__path_prefix_list:
548 if path.startswith(prefix):
549 return path[len(prefix):]
553 def initialize_encryption (self, mode):
554 password = self.password
555 key = self.crypto_key
558 if key is None and password is None:
560 if mode == CRYPTO_MODE_ENCRYPT:
561 return crypto.Encrypt (password=password,
564 version=self.crypto_version,
565 paramversion=self.crypto_paramversion)
566 if mode == CRYPTO_MODE_DECRYPT:
567 return crypto.Decrypt (password=password, key=key)
569 raise Exception ("invalid encryption mode [%r]" % mode)
572 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
574 Given the specified configuration, opens a file for reading or writing,
575 inheriting the encryption and compression settings from the backup.
576 Returns a file object ready to use.
578 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
581 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
582 Both the info and the auxiliary file have a globally
583 unique, constant counter value.
586 if self.index_mode.startswith('gz'):
588 elif self.index_mode.startswith('bz2'):
596 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
598 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
600 if crypto_ctx is not None:
601 if kind == AUXILIARY_FILE_INFO:
602 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
603 elif kind == AUXILIARY_FILE_INDEX:
604 enccounter = crypto.AES_GCM_IV_CNT_INDEX
606 raise Exception ("invalid kind of aux file %r" % kind)
608 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
609 bufsize=tarfile.RECORDSIZE, fileobj=None,
610 encryption=crypto_ctx, enccounter=enccounter)
615 def create_full_backup(self, source_path, backup_path,
616 max_volume_size=None, extra_data=dict()):
618 Creates a full backup.
621 - source_path: source path to the directory to back up.
622 - backup_path: path where the back up will be stored. Backup path will
623 be created if not existent.
624 - max_volume_size: maximum volume size in megabytes. Used to split the
625 backup in volumes. Optional (won't split in volumes by default).
626 - extra_data: a json-serializable dictionary with information that you
627 want to be included in the header of the index file
630 if not isinstance(source_path, str):
631 raise Exception('Source path must be a string')
633 if not isinstance(backup_path, str):
634 raise Exception('Backup path must be a string')
636 if not os.path.exists(source_path) or not os.path.isdir(source_path):
637 raise Exception('Source path "%s" does not exist or is not a '\
638 'directory' % source_path)
640 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
641 max_volume_size < 1):
642 raise Exception('max_volume_size must be a positive integer')
643 if max_volume_size != None:
644 max_volume_size = max_volume_size*1024*1024
646 if not isinstance(extra_data, dict):
647 raise Exception('extra_data must be a dictionary')
650 extra_data_str = json.dumps(extra_data)
652 raise Exception('extra_data is not json-serializable')
654 if not os.access(source_path, os.R_OK):
655 raise Exception('Source path "%s" is not readable' % source_path)
657 # try to create backup path if needed
658 os.makedirs(backup_path, exist_ok=True)
660 if not os.access(backup_path, os.W_OK):
661 raise Exception('Backup path "%s" is not writeable' % backup_path)
663 if source_path.endswith('/'):
664 source_path = source_path[:-1]
666 if backup_path.endswith('/'):
667 backup_path = backup_path[:-1]
669 # update current time
670 self.current_time = datetime.datetime.now()
672 if self.mode not in self.__file_extensions_dict:
673 raise Exception('Unrecognized extension')
675 # setup for encrypting payload
676 if self.encryptor is None:
677 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
679 # some initialization
682 # generate the first volume name
683 vol_name = self.volume_name_func(backup_path, True, 0)
684 tarfile_path = os.path.join(backup_path, vol_name)
687 index_name = self.index_name_func(True)
688 index_path = os.path.join(backup_path, index_name)
689 index_sink = self.open_auxiliary_file(index_path, 'w')
693 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
695 Handles the new volumes
697 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
698 volume_path = os.path.join(backup_path, volume_name)
699 deltarobj.vol_no = volume_number
701 # we convert relative paths into absolute because CWD is changed
702 if not os.path.isabs(volume_path):
703 volume_path = os.path.join(cwd, volume_path)
705 if tarobj.fileobj is not None:
706 tarobj.fileobj.close()
708 deltarobj.logger.debug("opening volume %s" % volume_path)
710 tarobj.open_volume(volume_path, encryption=encryption)
712 # wraps some args from context into the handler
713 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
715 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
717 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
718 # calculate checksum and write into the stream
719 crc = binascii.crc32(s) & 0xFFFFffff
722 # start creating the tarfile
723 tarobj = tarfile.TarFile.open(tarfile_path,
724 mode='w' + self.mode,
725 format=tarfile.GNU_FORMAT,
726 concat='#' in self.mode,
727 encryption=self.encryptor,
728 max_volume_size=max_volume_size,
729 new_volume_handler=new_volume_handler,
730 save_to_members=False,
732 os.chdir(source_path)
734 # for each file to be in the backup, do:
735 for path in self._recursive_walk_dir('.'):
736 # calculate stat dict for current file
737 statd = self._stat_dict(path)
738 statd['path'] = u'snapshot://' + statd['path']
739 statd['volume'] = self.vol_no
744 tarobj.add(path, arcname = statd['path'], recursive=False)
745 except FileNotFoundError as exn:
746 # file vanished since the call to access(3) above
747 self.logger.warning ("object [%s] no longer available in "
748 "file system (error: %s); skipping"
750 continue # prevent indexing
752 # retrieve file offset
753 statd['offset'] = tarobj.get_last_member_offset()
754 self.logger.debug("backup %s" % statd['path'])
756 # store the stat dict in the index
757 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
758 crc = binascii.crc32(s, crc) & 0xffffffff
761 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
762 crc = binascii.crc32(s, crc) & 0xffffffff
764 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
769 index_sink.close (close_fileobj=True)
771 def create_diff_backup(self, source_path, backup_path, previous_index_path,
772 max_volume_size=None, extra_data=dict()):
777 - source_path: source path to the directory to back up.
778 - backup_path: path where the back up will be stored. Backup path will
779 be created if not existent.
780 - previous_index_path: index of the previous backup, needed to know
781 which files changed since then.
782 - max_volume_size: maximum volume size in megabytes (MB). Used to split
783 the backup in volumes. Optional (won't split in volumes by default).
785 NOTE: previous index is assumed to follow exactly the same format as
786 the index_mode setup in the constructor.
788 # check/sanitize input
789 if not isinstance(source_path, str):
790 raise Exception('Source path must be a string')
792 if not isinstance(backup_path, str):
793 raise Exception('Backup path must be a string')
795 if not os.path.exists(source_path) or not os.path.isdir(source_path):
796 raise Exception('Source path "%s" does not exist or is not a '\
797 'directory' % source_path)
799 if not isinstance(extra_data, dict):
800 raise Exception('extra_data must be a dictionary')
803 extra_data_str = json.dumps(extra_data)
805 raise Exception('extra_data is not json-serializable')
807 if not os.access(source_path, os.R_OK):
808 raise Exception('Source path "%s" is not readable' % source_path)
810 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
811 max_volume_size < 1):
812 raise Exception('max_volume_size must be a positive integer')
813 if max_volume_size != None:
814 max_volume_size = max_volume_size*1024*1024
816 if not isinstance(previous_index_path, str):
817 raise Exception('previous_index_path must be A string')
819 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
820 raise Exception('Index path "%s" does not exist or is not a '\
821 'file' % previous_index_path)
823 if not os.access(previous_index_path, os.R_OK):
824 raise Exception('Index path "%s" is not readable' % previous_index_path)
826 # try to create backup path if needed
827 os.makedirs(backup_path, exist_ok=True)
829 if not os.access(backup_path, os.W_OK):
830 raise Exception('Backup path "%s" is not writeable' % backup_path)
832 if source_path.endswith('/'):
833 source_path = source_path[:-1]
835 if backup_path.endswith('/'):
836 backup_path = backup_path[:-1]
838 # update current time
839 self.current_time = datetime.datetime.now()
841 if self.mode not in self.__file_extensions_dict:
842 raise Exception('Unrecognized extension')
844 # setup for encrypting payload
845 if self.encryptor is None:
846 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
848 # some initialization
851 # generate the first volume name
852 vol_name = self.volume_name_func(backup_path, is_full=False,
854 tarfile_path = os.path.join(backup_path, vol_name)
859 index_name = self.index_name_func(is_full=False)
860 index_path = os.path.join(backup_path, index_name)
861 index_sink = self.open_auxiliary_file(index_path, 'w')
863 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
865 Handles the new volumes
867 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
868 volume_number=volume_number)
869 volume_path = os.path.join(backup_path, volume_name)
870 deltarobj.vol_no = volume_number
872 # we convert relative paths into absolute because CWD is changed
873 if not os.path.isabs(volume_path):
874 volume_path = os.path.join(cwd, volume_path)
876 deltarobj.logger.debug("opening volume %s" % volume_path)
877 tarobj.open_volume(volume_path)
879 # wraps some args from context into the handler
880 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
882 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
884 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
885 # calculate checksum and write into the stream
886 crc = binascii.crc32(s) & 0xFFFFffff
889 # start creating the tarfile
890 tarobj = tarfile.TarFile.open(tarfile_path,
891 mode='w' + self.mode,
892 format=tarfile.GNU_FORMAT,
893 concat='#' in self.mode,
894 encryption=self.encryptor,
895 max_volume_size=max_volume_size,
896 new_volume_handler=new_volume_handler,
897 save_to_members=False,
901 # create the iterators, first the previous index iterator, then the
902 # source path directory iterator and collate and iterate them
903 if not os.path.isabs(previous_index_path):
904 previous_index_path = os.path.join(cwd, previous_index_path)
905 index_it = self.iterate_index_path(previous_index_path)
907 os.chdir(source_path)
908 dir_it = self._recursive_walk_dir('.')
909 dir_path_it = self.jsonize_path_iterator(dir_it)
917 # for each file to be in the backup, do:
918 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
920 # if file is not in the index, it means it's a new file, so we have
925 # if the file is not in the directory iterator, it means that it has
926 # been deleted, so we need to mark it as such
929 # if the file is in both iterators, it means it might have either
930 # not changed (in which case we will just list it in our index but
931 # it will not be included in the tar file), or it might have
932 # changed, in which case we will snapshot it.
933 elif ipath and dpath:
934 if self._equal_stat_dicts(ipath, dpath):
938 # TODO: when creating chained backups (i.e. diffing from another
939 # diff), we will need to detect the type of action in the previous
940 # index, because if it was delete and dpath is None, we should
943 if action == 'snapshot':
944 # calculate stat dict for current file
946 stat['path'] = "snapshot://" + dpath['path']
947 stat['volume'] = self.vol_no
949 self.logger.debug("[STORE] %s" % dpath['path'])
952 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
953 # retrieve file offset
954 stat['offset'] = tarobj.get_last_member_offset()
955 except FileNotFoundError as exn:
956 # file vanished since the call to access(3) above
957 self.logger.warning ("object [%s] no longer available in "
958 "file system (error: %s); skipping"
959 % (dpath ["path"], str (exn)))
960 stat = None # prevent indexing
962 elif action == 'delete':
963 path = self.unprefixed(ipath['path'])
965 u'path': u'delete://' + path,
966 u'type': ipath['type']
968 self.logger.debug("[DELETE] %s" % path)
970 # mark it as deleted in the backup
971 tarobj.add("/dev/null", arcname=stat['path'])
972 elif action == 'list':
974 path = self.unprefixed(ipath['path'])
975 stat['path'] = u'list://' + path
976 # unchanged files do not enter in the backup, only in the index
977 self.logger.debug("[UNCHANGED] %s" % path)
980 self.logger.warning('unknown action in create_diff_backup: {0}'
985 # store the stat dict in the index
986 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
987 crc = binascii.crc32(s, crc) & 0xffffffff
990 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
991 crc = binascii.crc32(s, crc) & 0xffffffff
993 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
1002 def iterate_index_path(self, index_path):
1004 Returns an index iterator. Internally, it uses a classic iterator class.
1005 We do that instead of just yielding so that the iterator object can have
1006 an additional function to close the file descriptor that is opened in
1010 class IndexPathIterator(object):
1011 def __init__(self, delta_tar, index_path):
1012 self.delta_tar = delta_tar
1013 self.index_path = index_path
1015 self.extra_data = dict()
1025 def __enter__(self):
1027 Allows this iterator to be used with the "with" statement
1030 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
1031 # check index header
1032 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1033 if j.get("type", '') != 'python-delta-tar-index' or\
1034 j.get('version', -1) != 1:
1035 raise Exception("invalid index file format: %s" % json.dumps(j))
1037 self.extra_data = j.get('extra_data', dict())
1039 # find BEGIN-FILE-LIST, ignore other headers
1041 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1042 if j.get('type', '') == 'BEGIN-FILE-LIST':
1046 def __exit__(self, type, value, tb):
1048 Allows this iterator to be used with the "with" statement
1055 # read each file in the index and process it to do the restore
1059 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1060 except Exception as e:
1065 op_type = j.get('type', '')
1067 # when we detect the end of the list, break the loop
1068 if op_type == 'END-FILE-LIST':
1074 if op_type not in ['directory', 'file', 'link']:
1075 self.delta_tar.logger.warning('unrecognized type to be '
1076 'restored: %s, line %d' % (op_type, l_no))
1078 return self.__next__()
1082 return IndexPathIterator(self, index_path)
1084 def iterate_tar_path(self, tar_path, new_volume_handler=None):
1086 Returns a tar iterator that iterates jsonized member items that contain
1087 an additional "member" field, used by RestoreHelper.
1089 class TarPathIterator(object):
1090 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
1091 self.delta_tar = delta_tar
1092 self.tar_path = tar_path
1094 self.last_member = None
1095 self.new_volume_handler = new_volume_handler
1103 self.tar_obj.close()
1105 def __enter__(self):
1107 Allows this iterator to be used with the "with" statement
1109 if self.tar_obj is None:
1111 if self.delta_tar.password is not None:
1112 decryptor = crypto.Decrypt \
1113 (password=self.delta_tar.password,
1114 key=self.delta_tar.crypto_key)
1115 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1116 mode='r' + self.delta_tar.mode,
1117 format=tarfile.GNU_FORMAT,
1118 concat='#' in self.delta_tar.mode,
1119 encryption=decryptor,
1120 new_volume_handler=self.new_volume_handler,
1121 save_to_members=False,
1125 def __exit__(self, type, value, tb):
1127 Allows this iterator to be used with the "with" statement
1130 self.tar_obj.close()
1135 Read each member and return it as a stat dict
1137 tarinfo = self.tar_obj.__iter__().__next__()
1138 # NOTE: here we compare if tarinfo.path is the same as before
1139 # instead of comparing the tarinfo object itself because the
1140 # object itself might change for multivol tarinfos
1141 if tarinfo is None or (self.last_member is not None and\
1142 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
1145 self.last_member = tarinfo
1148 if tarinfo.isfile():
1150 elif tarinfo.isdir():
1152 elif tarinfo.islnk() or tarinfo.issym():
1157 u'path': tarinfo.path,
1158 u'mode': tarinfo.mode,
1159 u'mtime': tarinfo.mtime,
1160 u'ctime': -1, # cannot restore
1161 u'uid': tarinfo.uid,
1162 u'gid': tarinfo.gid,
1163 u'inode': -1, # cannot restore
1164 u'size': tarinfo.size,
1168 return TarPathIterator(self, tar_path, new_volume_handler)
1170 def jsonize_path_iterator(self, iter, strip=0):
1172 converts the yielded items of an iterator into json path lines.
1174 strip: Strip the smallest prefix containing num leading slashes from
1179 path = iter.__next__()
1181 yield self._stat_dict(path), 0
1183 st = self._stat_dict(path)
1184 st['path'] = "/".join(path.split("/")[strip:])
1186 except StopIteration:
1189 def iterate_disaster_index (self, index):
1191 Mimick the behavior of the other object iterators, just with the inputs
1192 supplied directly as *index*.
1195 class RawIndexIterator(object):
1196 def __init__(self, delta_tar, index):
1197 self.delta_tar = delta_tar
1207 def __enter__(self):
1209 Allows this iterator to be used with the "with" statement
1211 self.iter = self.index.__iter__ ()
1214 def __exit__(self, type, value, tb):
1216 Allows this iterator to be used with the "with" statement
1220 idxent = self.iter.__next__ ()
1223 return RawIndexIterator(self, index)
1225 def collate_iterators(self, it1, it2):
1227 Collate two iterators, so that it returns pairs of the items of each
1228 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1229 when there's no match for the items in the other iterator.
1231 It assumes that the items in both lists are ordered in the same way.
1234 elem1, elem2 = None, None
1238 elem1, l_no = it1.__next__()
1239 except StopIteration:
1241 yield (None, elem2, l_no)
1243 if isinstance(elem2, tuple):
1245 yield (None, elem2, l_no)
1249 elem2 = it2.__next__()
1250 if isinstance(elem2, tuple):
1252 except StopIteration:
1254 yield (elem1, None, l_no)
1255 for elem1, l_no in it1:
1256 yield (elem1, None, l_no)
1259 index1 = self.unprefixed(elem1['path'])
1260 index2 = self.unprefixed(elem2['path'])
1261 i1, i2 = self.compare_indexes(index1, index2)
1263 yield1 = yield2 = None
1270 yield (yield1, yield2, l_no)
1272 def compare_indexes(self, index1, index2):
1274 Compare iterator indexes and return a tuple in the following form:
1275 if index1 < index2, returns (index1, None)
1276 if index1 == index2 returns (index1, index2)
1277 else: returns (None, index2)
1279 l1 = index1.split('/')
1280 l2 = index2.split('/')
1281 length = len(l2) - len(l1)
1284 return (index1, None)
1286 return (None, index2)
1288 for i1, i2 in zip(l1, l2):
1290 return (index1, None)
1292 return (None, index2)
1294 return (index1, index2)
1296 def list_backup(self, backup_tar_path, list_func=None):
1297 if not isinstance(backup_tar_path, str):
1298 raise Exception('Backup tar path must be a string')
1300 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1301 raise Exception('Source path "%s" does not exist or is not a '\
1302 'file' % backup_tar_path)
1304 if not os.access(backup_tar_path, os.R_OK):
1305 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1309 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
1311 Handles the new volumes
1313 volume_name = deltarobj.volume_name_func(backup_path, True,
1314 volume_number, guess_name=True)
1315 volume_path = os.path.join(backup_path, volume_name)
1317 # we convert relative paths into absolute because CWD is changed
1318 if not os.path.isabs(volume_path):
1319 volume_path = os.path.join(cwd, volume_path)
1320 tarobj.open_volume(volume_path, encryption=encryption)
1322 if self.decryptor is None:
1323 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1325 backup_path = os.path.dirname(backup_tar_path)
1326 if not os.path.isabs(backup_path):
1327 backup_path = os.path.join(cwd, backup_path)
1328 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
1330 tarobj = tarfile.TarFile.open(backup_tar_path,
1331 mode='r' + self.mode,
1332 format=tarfile.GNU_FORMAT,
1333 concat='#' in self.mode,
1334 encryption=self.decryptor,
1335 new_volume_handler=new_volume_handler,
1336 save_to_members=False,
1339 def filter(cls, list_func, tarinfo):
1340 if list_func is None:
1341 self.logger.info(tarinfo.path)
1345 filter = partial(filter, self, list_func)
1347 tarobj.extractall(filter=filter)
1350 def restore_backup(self, target_path, backup_indexes_paths=[],
1351 backup_tar_path=None, restore_callback=None,
1352 disaster=tarfile.TOLERANCE_STRICT, backup_index=None):
1357 - target_path: path to restore.
1358 - backup_indexes_paths: path to backup indexes, in descending date order.
1359 The indexes indicate the location of their respective backup volumes,
1360 and multiple indexes are needed to be able to restore diff backups.
1361 Note that this is an optional parameter: if not suplied, it will
1362 try to restore directly from backup_tar_path.
1363 - backup_tar_path: path to the backup tar file. Used as an alternative
1364 to backup_indexes_paths to restore directly from a tar file without
1365 using any file index. If it's a multivol tarfile, volume_name_func
1367 - restore_callback: callback function to be called during restore.
1368 This is passed to the helper and gets called for every file.
1370 NOTE: If you want to use an index to restore a backup, this function
1371 only supports to do so when the tarfile mode is either uncompressed or
1372 uses concat compress mode, because otherwise it would be very slow.
1374 NOTE: Indices are assumed to follow the same format as the index_mode
1375 specified in the constructor.
1377 Returns the list of files that could not be restored, if there were
1380 # check/sanitize input
1381 if not isinstance(target_path, str):
1382 raise Exception('Target path must be a string')
1384 if backup_indexes_paths is None and backup_tar_path == []:
1385 raise Exception("You have to either provide index paths or a tar path")
1387 if isinstance (backup_index, list) is True:
1389 elif len(backup_indexes_paths) == 0:
1395 if not isinstance(backup_tar_path, str):
1396 raise Exception('Backup tar path must be a string')
1398 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1399 raise Exception('Source path "%s" does not exist or is not a '\
1400 'file' % backup_tar_path)
1402 if not os.access(backup_tar_path, os.R_OK):
1403 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1405 if not isinstance(backup_indexes_paths, list):
1406 raise Exception('backup_indexes_paths must be a list')
1408 if self.mode.startswith(':') or self.mode.startswith('|'):
1409 raise Exception('Restore only supports either uncompressed tars'
1410 ' or concat compression when restoring from an index, and '
1411 ' the open mode you provided is "%s"' % self.mode)
1413 for index in backup_indexes_paths:
1414 if not isinstance(index, str):
1415 raise Exception('indices must be strings')
1417 if not os.path.exists(index) or not os.path.isfile(index):
1418 raise Exception('Index path "%s" does not exist or is not a '\
1421 if not os.access(index, os.R_OK):
1422 raise Exception('Index path "%s" is not readable' % index)
1424 # try to create backup path if needed
1425 os.makedirs(target_path, exist_ok=True)
1427 # make backup_tar_path absolute so that iterate_tar_path works fine
1428 if backup_tar_path and not os.path.isabs(backup_tar_path):
1429 backup_tar_path = os.path.abspath(backup_tar_path)
1432 os.chdir(target_path)
1434 # setup for decrypting payload
1435 if self.decryptor is None:
1436 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1439 index_it = self.iterate_tar_path(backup_tar_path)
1440 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
1441 tarobj=index_it.tar_obj)
1442 elif mode == "diff":
1443 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1446 # get iterator from newest index at _data[0]
1447 index1 = helper._data[0]["path"]
1448 index_it = self.iterate_index_path(index1)
1449 except tarfile.DecryptionError as exn:
1450 self.logger.error("failed to decrypt file [%s]: %s; is this an "
1451 "actual encrypted index file?"
1452 % (index1, str (exn)))
1453 return [(index1, exn)]
1454 except Exception as exn:
1456 self.logger.error("failed to read file [%s]: %s; is this an "
1457 "actual index file?" % (index1, str (exn)))
1458 return [(index1, exn)]
1459 elif mode == "disaster":
1460 index_it = self.iterate_disaster_index (backup_index)
1461 helper = RestoreHelper (self, cwd, backup_path=backup_tar_path,
1462 backup_index=backup_index,
1466 dir_it = self._recursive_walk_dir('.')
1467 dir_path_it = self.jsonize_path_iterator(dir_it)
1469 failed = [] # irrecoverable files
1471 # for each file to be restored, do:
1472 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1474 upath = dpath['path']
1475 op_type = dpath['type']
1477 upath = self.unprefixed(ipath['path'])
1478 op_type = ipath['type']
1481 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
1484 # if types of the file mismatch, the file needs to be deleted
1486 if ipath is not None and dpath is not None and\
1487 dpath['type'] != ipath['type']:
1488 helper.delete(upath)
1490 # if file not found in dpath, we can directly restore from index
1492 # if the file doesn't exist and it needs to be deleted, it
1493 # means that work is already done
1494 if ipath['path'].startswith('delete://'):
1497 self.logger.debug("restore %s" % ipath['path'])
1498 helper.restore(ipath, l_no, restore_callback)
1499 except Exception as e:
1500 iipath = ipath.get ("path", "")
1501 self.logger.error("FAILED to restore: {} ({})"
1503 if disaster != tarfile.TOLERANCE_STRICT:
1504 failed.append ((iipath, e))
1507 # if both files are equal, we have nothing to restore
1508 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1511 # we have to restore the file, but first we need to delete the
1512 # current existing file.
1513 # we don't delete the file if it's a directory, because it might
1514 # just have changed mtime, so it's quite inefficient to remove
1517 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
1518 helper.delete(upath)
1519 self.logger.debug("restore %s" % ipath['path'])
1521 helper.restore(ipath, l_no, restore_callback)
1522 except Exception as e:
1523 if disaster == tarfile.TOLERANCE_STRICT:
1525 failed.append ((ipath.get ("path", ""), e))
1528 # if the file is not in the index (so it comes from the target
1529 # directory) then we have to delete it
1531 self.logger.debug("delete %s" % upath)
1532 helper.delete(upath)
1534 helper.restore_directories_permissions()
1542 def recover_backup(self, target_path, backup_indexes_paths=[],
1543 restore_callback=None):
1545 Walk the index, extracting objects in disaster mode. Bad files are
1546 reported along with a reason.
1548 return self.restore_backup(target_path,
1549 backup_indexes_paths=backup_indexes_paths,
1550 disaster=tarfile.TOLERANCE_RECOVER)
1553 def rescue_backup(self, target_path, backup_tar_path,
1554 restore_callback=None):
1556 More aggressive “unfsck” mode: do not rely on the index data as the
1557 files may be corrupt; skim files for header-like information and
1558 attempt to retrieve the data.
1560 def gen_volume_name (nvol):
1561 return os.path.join (os.path.dirname (backup_tar_path),
1562 self.volume_name_func (backup_tar_path,
1566 backup_index = tarfile.gen_rescue_index (gen_volume_name,
1568 password=self.password,
1569 key=self.crypto_key)
1571 return self.restore_backup(target_path,
1572 backup_index=backup_index,
1573 backup_tar_path=backup_tar_path,
1574 disaster=tarfile.TOLERANCE_RESCUE)
1577 def _parse_json_line(self, f, l_no):
1579 Read line from file like object and process it as JSON.
1584 j = json.loads(l.decode('UTF-8'))
1585 except UnicodeDecodeError as e:
1586 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1588 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1589 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1592 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1593 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1595 except ValueError as e:
1596 raise Exception("error parsing this json line "
1597 "(line number %d): %s" % (l_no, l))
1601 class RestoreHelper(object):
1603 Class used to help to restore files from indices
1606 # holds the dicts of data
1613 # list of directories to be restored. This is done as a last step, see
1614 # tarfile.extractall for details.
1617 _disaster = tarfile.TOLERANCE_STRICT
1619 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
1620 backup_index=None, tarobj=None,
1621 disaster=tarfile.TOLERANCE_STRICT):
1623 Constructor opens the tars and init the data structures.
1627 - Index list must be provided in reverse order (newer first).
1628 - “newer first” apparently means that if there are n backups
1629 provided, the last full backup is at index n-1 and the most recent
1630 diff backup is at index 0.
1631 - Only the first, the second, and the last elements of
1632 ``index_list`` are relevant, others will not be accessed.
1633 - If no ``index_list`` is provided, both ``tarobj`` and
1634 ``backup_path`` must be passed.
1635 - If ``index_list`` is provided, the values of ``tarobj`` and
1636 ``backup_path`` are ignored.
1639 self._directories = []
1640 self._deltatar = deltatar
1642 self._password = deltatar.password
1643 self._crypto_key = deltatar.crypto_key
1644 self._decryptors = []
1645 self._disaster = disaster
1652 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1653 self.canchown = True
1655 self.canchown = False
1657 if isinstance (backup_index, list) is True:
1658 decryptor = self._deltatar.decryptor
1660 [{ "curr_vol_no" : None
1664 , "path" : backup_path
1667 , "last_itelement" : None
1669 , "new_volume_handler" :
1670 partial(self.new_volume_handler,
1671 self._deltatar, self._cwd, True,
1672 os.path.dirname(backup_path), decryptor)
1673 , "decryptor" : decryptor
1675 elif index_list is not None:
1676 for index in index_list:
1677 is_full = index == index_list[-1]
1680 if self._password is not None:
1681 decryptor = crypto.Decrypt (password=self._password,
1682 key=self._crypto_key)
1684 # make paths absolute to avoid cwd problems
1685 if not os.path.isabs(index):
1686 index = os.path.normpath(os.path.join(cwd, index))
1696 last_itelement = None,
1698 new_volume_handler = partial(self.new_volume_handler,
1699 self._deltatar, self._cwd, is_full,
1700 os.path.dirname(index), decryptor),
1701 decryptor = decryptor
1703 self._data.append(s)
1705 # make paths absolute to avoid cwd problems
1706 if not os.path.isabs(backup_path):
1707 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
1709 # update the new_volume_handler of tar_obj
1710 tarobj.new_volume_handler = partial(self.new_volume_handler,
1711 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
1712 self._deltatar.decryptor)
1721 last_itelement = None,
1723 new_volume_handler = tarobj.new_volume_handler,
1724 decryptor = self._deltatar.decryptor
1726 self._data.append(s)
1731 Closes all open files
1733 for data in self._data:
1735 data['vol_fd'].close()
1736 data['vol_fd'] = None
1738 data['tarobj'].close()
1739 data['tarobj'] = None
1741 def delete(self, path):
1745 if not os.path.exists(path):
1748 # to preserve parent directory mtime, we save it
1749 parent_dir = os.path.dirname(path) or os.getcwd()
1750 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1752 if os.path.isdir(path) and not os.path.islink(path):
1757 # now we restore parent_directory mtime
1758 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1760 def restore(self, itpath, l_no, callback=None):
1762 Restore the path from the appropriate backup. Receives the current path
1763 from the newest (=first) index iterator. itpath must be not null.
1764 callback is a custom function that gets called for every file.
1766 NB: This function takes the attribute ``_data`` as input but will only
1767 ever use its first and, if available, second element. Anything else in
1768 ``._data[]`` will be ignored.
1770 path = itpath['path']
1772 # Calls the callback function
1776 if path.startswith('delete://'):
1777 # the file has previously been deleted already in restore_backup in
1778 # all cases so we just need to finish
1781 # get data from newest index (_data[0])
1782 data = self._data[0]
1783 upath = self._deltatar.unprefixed(path)
1785 # to preserve parent directory mtime, we save it
1786 parent_dir = os.path.dirname(upath) or os.getcwd()
1787 os.makedirs(parent_dir, exist_ok=True)
1788 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1790 # if path is found in the newest index as to be snapshotted, deal with it
1792 if path.startswith('snapshot://'):
1793 self.restore_file(itpath, data, path, l_no, upath)
1795 # now we restore parent_directory mtime
1796 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1799 # we go from index to index, finding the path in the index, then finding
1800 # the index with the most recent snapshot of the file being restored
1802 # Right now we support diff backups, only. No incremental backups.
1803 # As a result _data[0] is always the diff backup index
1804 # and _data[1] the full backup index.
1805 if len(self._data) == 2:
1806 data = self._data[1]
1807 d, l_no, dpath = self.find_path_in_index(data, upath)
1809 self._deltatar.logger.warning('Error restoring file %s from '
1810 'index, not found in index %s' % (path, data['path']))
1813 cur_path = d.get('path', '')
1814 if cur_path.startswith('delete://'):
1815 self._deltatar.logger.warning(('Strange thing happened, file '
1816 '%s was listed in first index but deleted by another '
1817 'one. Path was ignored and untouched.') % path)
1819 elif cur_path.startswith('snapshot://'):
1820 # this code path is reached when the file is unchanged
1821 # in the newest index and therefore of type 'list://'
1822 self.restore_file(d, data, path, l_no, dpath)
1824 # now we restore parent_directory mtime
1825 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1828 # error code path is reached when:
1829 # a) we have more than two indexes (unsupported atm)
1830 # b) both indexes contain a list:// entry (logic error)
1831 # c) we have just one index and it also contains list://
1832 self._deltatar.logger.warning(('Error restoring file %s from index, '
1833 'snapshot not found in any index') % path)
1835 def find_path_in_index(self, data, upath):
1836 # NOTE: we restart the iterator sometimes because the iterator can be
1837 # walked over completely multiple times, for example if one path if not
1838 # found in one index and we have to go to the next index.
1839 it = data['iterator']
1841 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
1842 d, l_no = it.__next__()
1844 d = data['last_itelement']
1845 l_no = data['last_lno']
1848 dpath = self._deltatar.unprefixed(d.get('path', ''))
1850 data['last_itelement'] = d
1851 data['last_lno'] = l_no
1852 return d, l_no, dpath
1854 up, dp = self._deltatar.compare_indexes(upath, dpath)
1855 # any time upath should have appeared before current dpath, it means
1856 # upath is just not in this index and we should stop
1858 data['last_itelement'] = d
1859 data['last_lno'] = l_no
1863 d, l_no = it.__next__()
1864 except StopIteration:
1865 data['last_itelement'] = d
1866 data['last_lno'] = l_no
1869 def restore_directories_permissions(self):
1871 Restore directory permissions when everything have been restored
1878 self._directories.sort(key=operator.attrgetter('name'))
1879 self._directories.reverse()
1881 # Set correct owner, mtime and filemode on directories.
1882 for member in self._directories:
1883 dirpath = member.name
1885 os.chmod(dirpath, member.mode)
1886 os.utime(dirpath, (member.mtime, member.mtime))
1888 # We have to be root to do so.
1890 g = grp.getgrnam(member.gname)[2]
1894 u = pwd.getpwnam(member.uname)[2]
1898 if member.issym and hasattr(os, "lchown"):
1899 os.lchown(dirpath, u, g)
1901 os.chown(dirpath, u, g)
1902 except EnvironmentError:
1903 raise tarfile.ExtractError("could not change owner")
1905 except tarfile.ExtractError as e:
1906 self._deltatar.logger.warning('tarfile: %s' % e)
1909 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
1911 Handles the new volumes
1913 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1914 volume_number, guess_name=True)
1915 volume_path = os.path.join(backup_path, volume_name)
1917 # we convert relative paths into absolute because CWD is changed
1918 if not os.path.isabs(volume_path):
1919 volume_path = os.path.join(cwd, volume_path)
1920 tarobj.open_volume(volume_path, encryption=encryption)
1922 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
1924 Restores a snapshot of a file from a specific backup
1926 op_type = file_data.get('type', -1)
1927 member = file_data.get('member', None)
1928 ismember = bool(member)
1930 # when member is set, then we can assume everything is right and we
1931 # just have to restore the path
1933 vol_no = file_data.get('volume', -1)
1935 if not isinstance(vol_no, int) or vol_no < 0:
1936 self._deltatar.logger.warning('unrecognized type to be restored: '
1937 '%s, line %d' % (op_type, l_no))
1939 # setup the volume that needs to be read. only needed when member is
1941 if index_data['curr_vol_no'] != vol_no:
1942 index_data['curr_vol_no'] = vol_no
1943 backup_path = os.path.dirname(index_data['path'])
1944 vol_name = self._deltatar.volume_name_func(backup_path,
1945 index_data['is_full'], vol_no, guess_name=True)
1946 vol_path = os.path.join(backup_path, vol_name)
1947 if index_data['vol_fd']:
1948 index_data['vol_fd'].close()
1949 index_data['vol_fd'] = open(vol_path, 'rb')
1951 # force reopen of the tarobj because of new volume
1952 if index_data['tarobj']:
1953 index_data['tarobj'].close()
1954 index_data['tarobj'] = None
1956 # seek tarfile if needed
1957 offset = file_data.get('offset', -1)
1958 if index_data['tarobj']:
1959 if self._disaster == tarfile.TOLERANCE_RESCUE:
1960 # force a seek and reopen
1961 index_data['tarobj'].close()
1962 index_data['tarobj'] = None
1965 member = index_data['tarobj'].__iter__().__next__()
1966 except tarfile.DecryptionError:
1968 except tarfile.CompressionError:
1971 if not member or member.path != file_data['path']:
1972 # force a seek and reopen
1973 index_data['tarobj'].close()
1974 index_data['tarobj'] = None
1977 # open the tarfile if needed
1978 if not index_data['tarobj']:
1979 index_data['vol_fd'].seek(offset)
1980 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1981 fileobj=index_data['vol_fd'],
1982 format=tarfile.GNU_FORMAT,
1983 concat='#' in self._deltatar.mode,
1984 encryption=index_data["decryptor"],
1985 new_volume_handler=index_data['new_volume_handler'],
1986 save_to_members=False,
1987 tolerance=self._disaster)
1989 member = index_data['tarobj'].__iter__().__next__()
1991 member.path = unprefixed_path
1992 member.name = unprefixed_path
1994 if op_type == 'directory':
1995 self.add_member_dir(member)
1996 member = copy.copy(member)
1997 member.mode = 0o0700
1999 # if it's an existing directory, we then don't need to recreate it
2000 # just set the right permissions, mtime and that kind of stuff
2001 if os.path.exists(member.path):
2005 # set current volume number in tarobj, otherwise the extraction of the
2006 # file might fail when trying to extract a multivolume member
2007 index_data['tarobj'].volume_number = index_data['curr_vol_no']
2009 def ignore_symlink (member, *_args):
2010 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
2012 # finally, restore the file
2013 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
2015 def add_member_dir(self, member):
2017 Add member dir to be restored at the end
2019 if not self.canchown:
2020 self._directories.append(DirItem(name=member.name, mode=member.mode,
2021 mtime=member.mtime))
2023 self._directories.append(DirItem(name=member.name, mode=member.mode,
2024 mtime=member.mtime, gname=member.gname, uname=member.uname,
2025 uid=member.uid, gid=member.gid, issym=member.issym()))
2027 class DirItem(object):
2028 def __init__(self, **kwargs):
2029 for k, v in kwargs.items():