3 # Copyright (C) 2013, 2014 Intra2net AG
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published
7 # by the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see
17 # <http://www.gnu.org/licenses/lgpl-3.0.html>
19 DELTATAR_HEADER_VERSION = 1
20 DELTATAR_PARAMETER_VERSION = 1
33 from functools import partial
38 class NullHandler(logging.Handler):
39 def emit(self, record):
43 logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
51 # encryption direction
52 CRYPTO_MODE_ENCRYPT = 0
53 CRYPTO_MODE_DECRYPT = 1
55 # The canonical extension for encrypted backup files regardless of the actual
56 # encryption parameters is “.pdtcrypt”. This is analogous to the encryption
57 # header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note:
58 # Since the introduction of the versioned header there no longer any need
59 # for encoding encryption parameters in the file extensions (“.aes128” and
61 PDTCRYPT_EXTENSION = "pdtcrypt"
65 AUXILIARY_FILE_INDEX = 0
66 AUXILIARY_FILE_INFO = 1
68 class DeltaTar(object):
70 Backup class used to create backups
73 # list of files to exclude in the backup creation or restore operation. It
74 # can contain python regular expressions.
77 # list of files to include in the backup creation or restore operation. It
78 # can contain python regular expressions. If empty, all files in the source
79 # path will be backed up (when creating a backup) or all the files in the
80 # backup will be restored (when restoring a backup), but if included_files
81 # is set then only the files include in the list will be processed.
84 # custom filter of files to be backed up (or restored). Unused and unset
85 # by default. The function receives a file path and must return a boolean.
88 # mode in which the delta will be created (when creating a backup) or
89 # opened (when restoring). Accepts modes analog to the tarfile library.
92 # used together with aes modes to encrypt and decrypt backups.
97 # parameter version to use when encrypting; note that this has no effect
98 # on decryption since the required settings are determined from the headers
99 crypto_version = DELTATAR_HEADER_VERSION
100 crypto_paramversion = None
102 # when encrypting or decrypting, these hold crypto handlers; created before
103 # establishing the Tarfile stream iff a password is supplied.
107 # python logger object.
110 # specifies the index mode in the same format as @param mode, but without
111 # the ':', '|' or '#' at the begining. It doesn't make sense to specify
112 # that the index is encrypted if no password is given in the constructor.
115 # current time for this backup. Used for file names and file creation checks
118 # extra data to included in the header of the index file when creating a
122 # valid tarfile modes and their corresponding default file extension
123 __file_extensions_dict = {
132 '#gz.pdtcrypt': '.gz',
137 # valid index modes and their corresponding default file extension
138 __index_extensions_dict = {
142 'gz.pdtcrypt': '.gz',
146 # valid path prefixes
147 __path_prefix_list = [
153 def __init__(self, excluded_files=[], included_files=[],
154 filter_func=None, mode="", password=None,
155 crypto_key=None, nacl=None,
156 crypto_version=DELTATAR_HEADER_VERSION,
157 crypto_paramversion=DELTATAR_PARAMETER_VERSION,
158 logger=None, index_mode=None, index_name_func=None,
159 volume_name_func=None):
161 Constructor. Configures the diff engine.
164 - excluded_files: list of files to exclude in the backup creation or
165 restore operation. It can contain python regular expressions.
167 - included_files: list of files to include in the backup creation or
168 restore operation. It can contain python regular expressions. If
169 empty, all files in the source path will be backed up (when creating a
170 backup) or all the files in the backup will be restored (when
171 restoring a backup), but if included_files is set then only the files
172 include in the list will be processed.
174 - filter_func: custom filter of files to be backed up (or restored).
175 Unused and unset by default. The function receives a file path and
176 must return a boolean.
178 - mode: mode in which the delta will be created (when creating a backup)
179 or opened (when restoring). Accepts the same modes as the tarfile
180 library. Valid modes are:
183 ':' open uncompressed
184 ':gz' open with gzip compression
185 ':bz2' open with bzip2 compression
186 '|' open an uncompressed stream of tar blocks
187 '|gz' open a gzip compressed stream of tar blocks
188 '|bz2' open a bzip2 compressed stream of tar blocks
189 '#gz' open a stream of gzip compressed tar blocks
191 - crypto_key: used to encrypt and decrypt backups. Encryption will
192 be enabled automatically if a key is supplied. Requires a salt to be
195 - nacl: salt that was used to derive the encryption key for embedding
196 in the PDTCRYPT header. Not needed when decrypting and when
197 encrypting with password.
199 - password: used to encrypt and decrypt backups. Encryption will be
200 enabled automatically if a password is supplied.
202 - crypto_version: version of the format, determining the kind of PDT
205 - crypto_paramversion: optionally request encryption conforming to
206 a specific parameter version. Defaults to the standard PDT value
207 which as of 2017 is the only one available.
209 - logger: python logger object. Optional.
211 - index_mode: specifies the index mode in the same format as @param
212 mode, but without the ':', '|' or '#' at the begining. If encryption
213 is requested it will extend to the auxiliary (index, info) files as
214 well. This is an optional parameter that will automatically mimic
215 @param mode by default if not provided. Valid modes are:
218 'gz' open with gzip compression
219 'bz2' open with bzip2 compression
221 - index_name_func: function that sets a custom name for the index file.
222 This function receives a flag to indicate whether the name will be
223 used for a full or diff backup. The backup path will be prepended to
226 - volume_name_func: function that defines the name of tar volumes. It
227 receives the backup_path, if it's a full backup and the volume number,
228 and must return the name for the corresponding volume name. Optional,
229 DeltaTar has default names for tar volumes.
232 if mode not in self.__file_extensions_dict:
233 raise Exception('Unrecognized extension mode=[%s] requested for files'
236 self.excluded_files = excluded_files
237 self.included_files = included_files
238 self.filter_func = filter_func
239 self.logger = logging.getLogger('deltatar.DeltaTar')
241 self.logger.addHandler(logger)
244 if crypto_key is not None:
245 self.crypto_key = crypto_key
246 self.nacl = nacl # encryption only
248 if password is not None:
249 self.password = password
251 if crypto_version is not None:
252 self.crypto_version = crypto_version
254 if crypto_paramversion is not None:
255 self.crypto_paramversion = crypto_paramversion
257 # generate index_mode
258 if index_mode is None:
264 elif mode not in self.__index_extensions_dict:
265 raise Exception('Unrecognized extension mode=[%s] requested for index'
268 self.index_mode = index_mode
269 self.current_time = datetime.datetime.now()
271 if index_name_func is not None:
272 self.index_name_func = index_name_func
274 if volume_name_func is not None:
275 self.volume_name_func = volume_name_func
277 def pick_extension(self, kind, mode=None):
279 Choose the extension depending on a) the kind of file given, b) the
280 processing mode, and c) the current encryption settings.
283 if kind == PDT_TYPE_ARCHIVE:
286 mode = self.__index_extensions_dict [self.index_mode]
288 if self.crypto_key is not None or self.password is not None:
289 ret += "." + PDTCRYPT_EXTENSION
292 def index_name_func(self, is_full): # pylint: disable=method-hidden
294 Callback for setting a custom name for the index file. Depending on
295 whether *is_full* is set, it will create a suitable name for a full
298 prefix = "bfull" if is_full else "bdiff"
299 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
300 extension = self.pick_extension \
302 self.__index_extensions_dict [self.index_mode])
304 return "%s-%s.index%s" % (prefix, date_str, extension)
306 def volume_name_func(self, backup_path, # pylint: disable=method-hidden
307 is_full, volume_number,
310 function that defines the name of tar volumes. It receives the
311 backup_path, if it's a full backup and the volume number, and must return
312 the name for the corresponding volume name. Optional, DeltaTar has default
313 names for tar volumes.
315 If guess_name is activated, the file is intended not to be created but
316 to be found, and thus the date will be guessed.
318 prefix = "bfull" if is_full else "bdiff"
319 extension = self.pick_extension \
321 self.__file_extensions_dict [self.mode])
324 date_str = self.current_time.strftime("%Y-%m-%d-%H%M")
325 return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension)
327 prefix = prefix + "-"
328 postfix = "-%03d%s" % (volume_number + 1, extension)
329 for f in os.listdir(backup_path):
330 if f.startswith(prefix) and f.endswith(postfix):
332 raise Exception("volume not found")
335 def filter_path(self, path, source_path="", is_dir=None):
337 Filters a path, given the source_path, using the filtering properties
338 set in the constructor.
339 The filtering order is:
340 1. included_files (if any)
342 3. filter_func (which must return whether the file is accepted or not)
345 if len(source_path) > 0:
346 # ensure that exactly one '/' at end of dir is also removed
347 source_path = source_path.rstrip(os.sep) + os.sep
348 path = path[len(source_path):]
350 # 1. filter included_files
352 if len(self.included_files) > 0:
354 for i in self.included_files:
355 # it can be either a regexp or a string
356 if isinstance(i, str):
357 # if the string matches, then continue
362 # if the string ends with / it's a directory, and if the
363 # path is contained in it, it is included
364 if i.endswith('/') and path.startswith(i):
368 # if the string doesn't end with /, add it and do the same
370 elif path.startswith(i + '/'):
374 # check for PARENT_MATCH
377 if not dir_path.endswith('/'):
380 if i.startswith(dir_path):
383 # if it's a reg exp, then we just check if it matches
384 elif isinstance(i, re._pattern_type):
389 self.logger.warning('Invalid pattern in included_files: %s' % str(i))
391 if match == NO_MATCH:
394 # when a directory is in PARENT_MATCH, it doesn't matter if it's
395 # excluded. It's subfiles will be excluded, but the directory itself
397 if match != PARENT_MATCH:
398 for e in self.excluded_files:
399 # it can be either a regexp or a string
400 if isinstance(e, str):
401 # if the string matches, then exclude
405 # if the string ends with / it's a directory, and if the
406 # path starts with the directory, then exclude
407 if e.endswith('/') and path.startswith(e):
410 # if the string doesn't end with /, do the same check with
412 elif path.startswith(e + '/'):
415 # if it's a reg exp, then we just check if it matches
416 elif isinstance(e, re._pattern_type):
420 self.logger.warning('Invalid pattern in excluded_files: %s' % str(e))
423 return self.filter_func(path)
427 def _recursive_walk_dir(self, source_path, keep_base_dir=False):
429 Walk a directory recursively, yielding each file/directory
431 Returns the path of an entity. If ``keep_base_dir`` is set,
432 the path returned contains the prefix ``source_path``; otherwise it is
433 relative to the prefix.
436 source_path = source_path.rstrip(os.sep)
441 beginning_size = len(source_path) + 1 # +1 for os.sep
443 queue = [source_path]
446 cur_path = queue.pop(0)
449 dfd = os.open (cur_path, os.O_DIRECTORY)
450 except FileNotFoundError as exn:
451 self.logger.warning ("failed to open entity [%s] as directory; "
452 "file system (error: %s); skipping"
453 % (cur_path, str (exn)))
457 for filename in sorted(os.listdir(dfd)):
458 child = os.path.join(cur_path, filename)
459 is_dir = os.path.isdir(child)
460 status = self.filter_path(child, source_path, is_dir)
461 if status == NO_MATCH:
463 if not os.access(child, os.R_OK):
464 self.logger.warning('Error accessing possibly locked file %s' % child)
468 yield child[beginning_size:]
470 if is_dir and (status == MATCH or status == PARENT_MATCH):
475 def _stat_dict(self, path):
477 Returns a dict with the stat data used to compare files
479 stinfo = os.stat(path)
480 mode = stinfo.st_mode
483 if stat.S_ISDIR(mode):
485 elif stat.S_ISREG(mode):
487 elif stat.S_ISLNK(mode):
494 u'mtime': int(stinfo.st_mtime),
495 u'ctime': int(stinfo.st_ctime),
496 u'uid': stinfo.st_uid,
497 u'gid': stinfo.st_gid,
498 u'inode': stinfo.st_ino,
499 u'size': stinfo.st_size
502 def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False):
504 Return if the dicts are equal in the stat keys
506 keys = [u'type', u'mode',u'size', u'mtime',
507 # not restored: u'inode', u'ctime'
510 # only if user is root, then also check gid/uid. otherwise do not check it,
511 # because tarfile can chown in case of being superuser only
513 # also, skip the check in rpmbuild since the sources end up with the
514 # uid:gid of the packager while the extracted files are 0:0.
515 if hasattr(os, "geteuid") and os.geteuid() == 0 \
516 and os.getenv ("RPMBUILD_OPTIONS") is None:
520 if (not d1 and d2 != None) or (d1 != None and not d2):
523 if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal):
526 type = d1.get('type', '')
529 # size doesn't matter for directories
530 if type == 'directory' and key == 'size':
532 if d1.get(key, -1) != d2.get(key, -2):
536 def prefixed(self, path, listsnapshot_equal=False):
538 if a path is not prefixed, return it prefixed
540 for prefix in self.__path_prefix_list:
541 if path.startswith(prefix):
542 if listsnapshot_equal and prefix == u'list://':
543 return u'snapshot://' + path[len(prefix):]
545 return u'snapshot://' + path
547 def unprefixed(self, path):
549 remove a path prefix if any
551 for prefix in self.__path_prefix_list:
552 if path.startswith(prefix):
553 return path[len(prefix):]
557 def initialize_encryption (self, mode):
558 password = self.password
559 key = self.crypto_key
562 if key is None and password is None:
564 if mode == CRYPTO_MODE_ENCRYPT:
565 return crypto.Encrypt (password=password,
568 version=self.crypto_version,
569 paramversion=self.crypto_paramversion)
570 if mode == CRYPTO_MODE_DECRYPT:
571 return crypto.Decrypt (password=password, key=key)
573 raise Exception ("invalid encryption mode [%r]" % mode)
576 def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX):
578 Given the specified configuration, opens a file for reading or writing,
579 inheriting the encryption and compression settings from the backup.
580 Returns a file object ready to use.
582 :param mode: IO mode (read or write, ``"r"`` and ``"w"``,
585 :param kind: Role of the file, see AUXILIARY_FILE_* constants.
586 Both the info and the auxiliary file have a globally
587 unique, constant counter value.
590 if self.index_mode.startswith('gz'):
592 elif self.index_mode.startswith('bz2'):
600 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
602 crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
604 if crypto_ctx is not None:
605 if kind == AUXILIARY_FILE_INFO:
606 enccounter = crypto.AES_GCM_IV_CNT_INFOFILE
607 elif kind == AUXILIARY_FILE_INDEX:
608 enccounter = crypto.AES_GCM_IV_CNT_INDEX
610 raise Exception ("invalid kind of aux file %r" % kind)
612 sink = tarfile._Stream(name=path, mode=mode, comptype=comptype,
613 bufsize=tarfile.RECORDSIZE, fileobj=None,
614 encryption=crypto_ctx, enccounter=enccounter)
619 def create_full_backup(self, source_path, backup_path,
620 max_volume_size=None, extra_data=dict()):
622 Creates a full backup.
625 - source_path: source path to the directory to back up.
626 - backup_path: path where the back up will be stored. Backup path will
627 be created if not existent.
628 - max_volume_size: maximum volume size in megabytes. Used to split the
629 backup in volumes. Optional (won't split in volumes by default).
630 - extra_data: a json-serializable dictionary with information that you
631 want to be included in the header of the index file
634 if not isinstance(source_path, str):
635 raise Exception('Source path must be a string')
637 if not isinstance(backup_path, str):
638 raise Exception('Backup path must be a string')
640 if not os.path.exists(source_path) or not os.path.isdir(source_path):
641 raise Exception('Source path "%s" does not exist or is not a '\
642 'directory' % source_path)
644 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
645 max_volume_size < 1):
646 raise Exception('max_volume_size must be a positive integer')
647 if max_volume_size != None:
648 max_volume_size = max_volume_size*1024*1024
650 if not isinstance(extra_data, dict):
651 raise Exception('extra_data must be a dictionary')
654 extra_data_str = json.dumps(extra_data)
656 raise Exception('extra_data is not json-serializable')
658 if not os.access(source_path, os.R_OK):
659 raise Exception('Source path "%s" is not readable' % source_path)
661 # try to create backup path if needed
662 os.makedirs(backup_path, exist_ok=True)
664 if not os.access(backup_path, os.W_OK):
665 raise Exception('Backup path "%s" is not writeable' % backup_path)
667 if source_path.endswith('/'):
668 source_path = source_path[:-1]
670 if backup_path.endswith('/'):
671 backup_path = backup_path[:-1]
673 # update current time
674 self.current_time = datetime.datetime.now()
676 if self.mode not in self.__file_extensions_dict:
677 raise Exception('Unrecognized extension')
679 # setup for encrypting payload
680 if self.encryptor is None:
681 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
683 # some initialization
686 # generate the first volume name
687 vol_name = self.volume_name_func(backup_path, True, 0)
688 tarfile_path = os.path.join(backup_path, vol_name)
691 index_name = self.index_name_func(True)
692 index_path = os.path.join(backup_path, index_name)
693 index_sink = self.open_auxiliary_file(index_path, 'w')
697 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
699 Handles the new volumes
701 volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
702 volume_path = os.path.join(backup_path, volume_name)
703 deltarobj.vol_no = volume_number
705 # we convert relative paths into absolute because CWD is changed
706 if not os.path.isabs(volume_path):
707 volume_path = os.path.join(cwd, volume_path)
709 if tarobj.fileobj is not None:
710 tarobj.fileobj.close()
712 deltarobj.logger.debug("opening volume %s" % volume_path)
714 tarobj.open_volume(volume_path, encryption=encryption)
716 # wraps some args from context into the handler
717 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor)
719 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
721 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
722 # calculate checksum and write into the stream
723 crc = binascii.crc32(s) & 0xFFFFffff
726 # start creating the tarfile
727 tarobj = tarfile.TarFile.open(tarfile_path,
728 mode='w' + self.mode,
729 format=tarfile.GNU_FORMAT,
730 concat='#' in self.mode,
731 encryption=self.encryptor,
732 max_volume_size=max_volume_size,
733 new_volume_handler=new_volume_handler,
734 save_to_members=False,
736 os.chdir(source_path)
738 # for each file to be in the backup, do:
739 for path in self._recursive_walk_dir('.'):
742 # calculate stat dict for current file
743 statd = self._stat_dict(path)
744 statd['path'] = u'snapshot://' + statd['path']
745 statd['volume'] = self.vol_no
748 tarobj.add(path, arcname = statd['path'], recursive=False)
749 except FileNotFoundError as exn:
750 # file vanished since the call to access(3) above
751 self.logger.warning ("object [%s] no longer available in "
752 "file system (error: %s); skipping"
754 continue # prevent indexing
756 # retrieve file offset
757 statd['offset'] = tarobj.get_last_member_offset()
758 self.logger.debug("backup %s" % statd['path'])
760 # store the stat dict in the index
761 s = bytes(json.dumps(statd) + '\n', 'UTF-8')
762 crc = binascii.crc32(s, crc) & 0xffffffff
765 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
766 crc = binascii.crc32(s, crc) & 0xffffffff
768 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
773 index_sink.close (close_fileobj=True)
775 def create_diff_backup(self, source_path, backup_path, previous_index_path,
776 max_volume_size=None, extra_data=dict()):
781 - source_path: source path to the directory to back up.
782 - backup_path: path where the back up will be stored. Backup path will
783 be created if not existent.
784 - previous_index_path: index of the previous backup, needed to know
785 which files changed since then.
786 - max_volume_size: maximum volume size in megabytes (MB). Used to split
787 the backup in volumes. Optional (won't split in volumes by default).
789 NOTE: previous index is assumed to follow exactly the same format as
790 the index_mode setup in the constructor.
792 # check/sanitize input
793 if not isinstance(source_path, str):
794 raise Exception('Source path must be a string')
796 if not isinstance(backup_path, str):
797 raise Exception('Backup path must be a string')
799 if not os.path.exists(source_path) or not os.path.isdir(source_path):
800 raise Exception('Source path "%s" does not exist or is not a '\
801 'directory' % source_path)
803 if not isinstance(extra_data, dict):
804 raise Exception('extra_data must be a dictionary')
807 extra_data_str = json.dumps(extra_data)
809 raise Exception('extra_data is not json-serializable')
811 if not os.access(source_path, os.R_OK):
812 raise Exception('Source path "%s" is not readable' % source_path)
814 if max_volume_size != None and (not isinstance(max_volume_size, int) or\
815 max_volume_size < 1):
816 raise Exception('max_volume_size must be a positive integer')
817 if max_volume_size != None:
818 max_volume_size = max_volume_size*1024*1024
820 if not isinstance(previous_index_path, str):
821 raise Exception('previous_index_path must be A string')
823 if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
824 raise Exception('Index path "%s" does not exist or is not a '\
825 'file' % previous_index_path)
827 if not os.access(previous_index_path, os.R_OK):
828 raise Exception('Index path "%s" is not readable' % previous_index_path)
830 # try to create backup path if needed
831 os.makedirs(backup_path, exist_ok=True)
833 if not os.access(backup_path, os.W_OK):
834 raise Exception('Backup path "%s" is not writeable' % backup_path)
836 if source_path.endswith('/'):
837 source_path = source_path[:-1]
839 if backup_path.endswith('/'):
840 backup_path = backup_path[:-1]
842 # update current time
843 self.current_time = datetime.datetime.now()
845 if self.mode not in self.__file_extensions_dict:
846 raise Exception('Unrecognized extension')
848 # setup for encrypting payload
849 if self.encryptor is None:
850 self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT)
852 # some initialization
855 # generate the first volume name
856 vol_name = self.volume_name_func(backup_path, is_full=False,
858 tarfile_path = os.path.join(backup_path, vol_name)
863 index_name = self.index_name_func(is_full=False)
864 index_path = os.path.join(backup_path, index_name)
865 index_sink = self.open_auxiliary_file(index_path, 'w')
867 def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
869 Handles the new volumes
871 volume_name = deltarobj.volume_name_func(backup_path, is_full=False,
872 volume_number=volume_number)
873 volume_path = os.path.join(backup_path, volume_name)
874 deltarobj.vol_no = volume_number
876 # we convert relative paths into absolute because CWD is changed
877 if not os.path.isabs(volume_path):
878 volume_path = os.path.join(cwd, volume_path)
880 deltarobj.logger.debug("opening volume %s" % volume_path)
881 tarobj.open_volume(volume_path)
883 # wraps some args from context into the handler
884 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
886 index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
888 s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
889 # calculate checksum and write into the stream
890 crc = binascii.crc32(s) & 0xFFFFffff
893 # start creating the tarfile
894 tarobj = tarfile.TarFile.open(tarfile_path,
895 mode='w' + self.mode,
896 format=tarfile.GNU_FORMAT,
897 concat='#' in self.mode,
898 encryption=self.encryptor,
899 max_volume_size=max_volume_size,
900 new_volume_handler=new_volume_handler,
901 save_to_members=False,
905 # create the iterators, first the previous index iterator, then the
906 # source path directory iterator and collate and iterate them
907 if not os.path.isabs(previous_index_path):
908 previous_index_path = os.path.join(cwd, previous_index_path)
909 index_it = self.iterate_index_path(previous_index_path)
911 os.chdir(source_path)
912 dir_it = self._recursive_walk_dir('.')
913 dir_path_it = self.jsonize_path_iterator(dir_it)
921 # for each file to be in the backup, do:
922 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
924 # if file is not in the index, it means it's a new file, so we have
929 # if the file is not in the directory iterator, it means that it has
930 # been deleted, so we need to mark it as such
933 # if the file is in both iterators, it means it might have either
934 # not changed (in which case we will just list it in our index but
935 # it will not be included in the tar file), or it might have
936 # changed, in which case we will snapshot it.
937 elif ipath and dpath:
938 if self._equal_stat_dicts(ipath, dpath):
942 # TODO: when creating chained backups (i.e. diffing from another
943 # diff), we will need to detect the type of action in the previous
944 # index, because if it was delete and dpath is None, we should
947 if action == 'snapshot':
948 # calculate stat dict for current file
950 stat['path'] = "snapshot://" + dpath['path']
951 stat['volume'] = self.vol_no
953 self.logger.debug("[STORE] %s" % dpath['path'])
956 tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
957 # retrieve file offset
958 stat['offset'] = tarobj.get_last_member_offset()
959 except FileNotFoundError as exn:
960 # file vanished since the call to access(3) above
961 self.logger.warning ("object [%s] no longer available in "
962 "file system (error: %s); skipping"
963 % (dpath ["path"], str (exn)))
964 stat = None # prevent indexing
966 elif action == 'delete':
967 path = self.unprefixed(ipath['path'])
969 u'path': u'delete://' + path,
970 u'type': ipath['type']
972 self.logger.debug("[DELETE] %s" % path)
974 # mark it as deleted in the backup
975 tarobj.add("/dev/null", arcname=stat['path'])
976 elif action == 'list':
978 path = self.unprefixed(ipath['path'])
979 stat['path'] = u'list://' + path
980 # unchanged files do not enter in the backup, only in the index
981 self.logger.debug("[UNCHANGED] %s" % path)
984 self.logger.warning('unknown action in create_diff_backup: {0}'
989 # store the stat dict in the index
990 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
991 crc = binascii.crc32(s, crc) & 0xffffffff
994 s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
995 crc = binascii.crc32(s, crc) & 0xffffffff
997 s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
1006 def iterate_index_path(self, index_path):
1008 Returns an index iterator. Internally, it uses a classic iterator class.
1009 We do that instead of just yielding so that the iterator object can have
1010 an additional function to close the file descriptor that is opened in
1014 class IndexPathIterator(object):
1015 def __init__(self, delta_tar, index_path):
1016 self.delta_tar = delta_tar
1017 self.index_path = index_path
1019 self.extra_data = dict()
1029 def __enter__(self):
1031 Allows this iterator to be used with the "with" statement
1034 self.f = self.delta_tar.open_auxiliary_file(self.index_path, 'r')
1035 # check index header
1036 j, l_no = self.delta_tar._parse_json_line(self.f, 0)
1037 if j.get("type", '') != 'python-delta-tar-index' or\
1038 j.get('version', -1) != 1:
1039 raise Exception("invalid index file format: %s" % json.dumps(j))
1041 self.extra_data = j.get('extra_data', dict())
1043 # find BEGIN-FILE-LIST, ignore other headers
1045 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1046 if j.get('type', '') == 'BEGIN-FILE-LIST':
1050 def __exit__(self, type, value, tb):
1052 Allows this iterator to be used with the "with" statement
1059 # read each file in the index and process it to do the restore
1063 j, l_no = self.delta_tar._parse_json_line(self.f, l_no)
1064 except Exception as e:
1069 op_type = j.get('type', '')
1071 # when we detect the end of the list, break the loop
1072 if op_type == 'END-FILE-LIST':
1078 if op_type not in ['directory', 'file', 'link']:
1079 self.delta_tar.logger.warning('unrecognized type to be '
1080 'restored: %s, line %d' % (op_type, l_no))
1082 return self.__next__()
1086 return IndexPathIterator(self, index_path)
1088 def iterate_tar_path(self, tar_path, new_volume_handler=None):
1090 Returns a tar iterator that iterates jsonized member items that contain
1091 an additional "member" field, used by RestoreHelper.
1093 class TarPathIterator(object):
1094 def __init__(self, delta_tar, tar_path, new_volume_handler=None):
1095 self.delta_tar = delta_tar
1096 self.tar_path = tar_path
1098 self.last_member = None
1099 self.new_volume_handler = new_volume_handler
1107 self.tar_obj.close()
1109 def __enter__(self):
1111 Allows this iterator to be used with the "with" statement
1113 if self.tar_obj is None:
1115 if self.delta_tar.password is not None:
1116 decryptor = crypto.Decrypt \
1117 (password=self.delta_tar.password,
1118 key=self.delta_tar.crypto_key)
1119 self.tar_obj = tarfile.TarFile.open(self.tar_path,
1120 mode='r' + self.delta_tar.mode,
1121 format=tarfile.GNU_FORMAT,
1122 concat='#' in self.delta_tar.mode,
1123 encryption=decryptor,
1124 new_volume_handler=self.new_volume_handler,
1125 save_to_members=False,
1129 def __exit__(self, type, value, tb):
1131 Allows this iterator to be used with the "with" statement
1134 self.tar_obj.close()
1139 Read each member and return it as a stat dict
1141 tarinfo = self.tar_obj.__iter__().__next__()
1142 # NOTE: here we compare if tarinfo.path is the same as before
1143 # instead of comparing the tarinfo object itself because the
1144 # object itself might change for multivol tarinfos
1145 if tarinfo is None or (self.last_member is not None and\
1146 self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)):
1149 self.last_member = tarinfo
1152 if tarinfo.isfile():
1154 elif tarinfo.isdir():
1156 elif tarinfo.islnk() or tarinfo.issym():
1161 u'path': tarinfo.path,
1162 u'mode': tarinfo.mode,
1163 u'mtime': tarinfo.mtime,
1164 u'ctime': -1, # cannot restore
1165 u'uid': tarinfo.uid,
1166 u'gid': tarinfo.gid,
1167 u'inode': -1, # cannot restore
1168 u'size': tarinfo.size,
1172 return TarPathIterator(self, tar_path, new_volume_handler)
1174 def jsonize_path_iterator(self, iter, strip=0):
1176 converts the yielded items of an iterator into json path lines.
1178 strip: Strip the smallest prefix containing num leading slashes from
1183 path = iter.__next__()
1185 yield self._stat_dict(path), 0
1187 st = self._stat_dict(path)
1188 st['path'] = "/".join(path.split("/")[strip:])
1190 except StopIteration:
1193 def iterate_disaster_index (self, index):
1195 Mimick the behavior of the other object iterators, just with the inputs
1196 supplied directly as *index*.
1199 class RawIndexIterator(object):
1200 def __init__(self, delta_tar, index):
1201 self.delta_tar = delta_tar
1211 def __enter__(self):
1213 Allows this iterator to be used with the "with" statement
1215 self.iter = self.index.__iter__ ()
1218 def __exit__(self, type, value, tb):
1220 Allows this iterator to be used with the "with" statement
1224 idxent = self.iter.__next__ ()
1227 return RawIndexIterator(self, index)
1229 def collate_iterators(self, it1, it2):
1231 Collate two iterators, so that it returns pairs of the items of each
1232 iterator (if the items are the same), or (None, elem2) or (elem1, None)
1233 when there's no match for the items in the other iterator.
1235 It assumes that the items in both lists are ordered in the same way.
1238 elem1, elem2 = None, None
1242 elem1, l_no = it1.__next__()
1243 except StopIteration:
1245 yield (None, elem2, l_no)
1247 if isinstance(elem2, tuple):
1249 yield (None, elem2, l_no)
1253 elem2 = it2.__next__()
1254 if isinstance(elem2, tuple):
1256 except StopIteration:
1258 yield (elem1, None, l_no)
1259 for elem1, l_no in it1:
1260 yield (elem1, None, l_no)
1263 index1 = self.unprefixed(elem1['path'])
1264 index2 = self.unprefixed(elem2['path'])
1265 i1, i2 = self.compare_indexes(index1, index2)
1267 yield1 = yield2 = None
1274 yield (yield1, yield2, l_no)
1276 def compare_indexes(self, index1, index2):
1278 Compare iterator indexes and return a tuple in the following form:
1279 if index1 < index2, returns (index1, None)
1280 if index1 == index2 returns (index1, index2)
1281 else: returns (None, index2)
1283 l1 = index1.split('/')
1284 l2 = index2.split('/')
1285 length = len(l2) - len(l1)
1288 return (index1, None)
1290 return (None, index2)
1292 for i1, i2 in zip(l1, l2):
1294 return (index1, None)
1296 return (None, index2)
1298 return (index1, index2)
1300 def list_backup(self, backup_tar_path, list_func=None):
1301 if not isinstance(backup_tar_path, str):
1302 raise Exception('Backup tar path must be a string')
1304 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1305 raise Exception('Source path "%s" does not exist or is not a '\
1306 'file' % backup_tar_path)
1308 if not os.access(backup_tar_path, os.R_OK):
1309 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1313 def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number):
1315 Handles the new volumes
1317 volume_name = deltarobj.volume_name_func(backup_path, True,
1318 volume_number, guess_name=True)
1319 volume_path = os.path.join(backup_path, volume_name)
1321 # we convert relative paths into absolute because CWD is changed
1322 if not os.path.isabs(volume_path):
1323 volume_path = os.path.join(cwd, volume_path)
1324 tarobj.open_volume(volume_path, encryption=encryption)
1326 if self.decryptor is None:
1327 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1329 backup_path = os.path.dirname(backup_tar_path)
1330 if not os.path.isabs(backup_path):
1331 backup_path = os.path.join(cwd, backup_path)
1332 new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor)
1334 tarobj = tarfile.TarFile.open(backup_tar_path,
1335 mode='r' + self.mode,
1336 format=tarfile.GNU_FORMAT,
1337 concat='#' in self.mode,
1338 encryption=self.decryptor,
1339 new_volume_handler=new_volume_handler,
1340 save_to_members=False,
1343 def filter(cls, list_func, tarinfo):
1344 if list_func is None:
1345 self.logger.info(tarinfo.path)
1349 filter = partial(filter, self, list_func)
1351 tarobj.extractall(filter=filter)
1354 def restore_backup(self, target_path, backup_indexes_paths=[],
1355 backup_tar_path=None, restore_callback=None,
1356 disaster=tarfile.TOLERANCE_STRICT, backup_index=None):
1361 - target_path: path to restore.
1362 - backup_indexes_paths: path to backup indexes, in descending date order.
1363 The indexes indicate the location of their respective backup volumes,
1364 and multiple indexes are needed to be able to restore diff backups.
1365 Note that this is an optional parameter: if not suplied, it will
1366 try to restore directly from backup_tar_path.
1367 - backup_tar_path: path to the backup tar file. Used as an alternative
1368 to backup_indexes_paths to restore directly from a tar file without
1369 using any file index. If it's a multivol tarfile, volume_name_func
1371 - restore_callback: callback function to be called during restore.
1372 This is passed to the helper and gets called for every file.
1374 NOTE: If you want to use an index to restore a backup, this function
1375 only supports to do so when the tarfile mode is either uncompressed or
1376 uses concat compress mode, because otherwise it would be very slow.
1378 NOTE: Indices are assumed to follow the same format as the index_mode
1379 specified in the constructor.
1381 Returns the list of files that could not be restored, if there were
1384 # check/sanitize input
1385 if not isinstance(target_path, str):
1386 raise Exception('Target path must be a string')
1388 if backup_indexes_paths is None and backup_tar_path == []:
1389 raise Exception("You have to either provide index paths or a tar path")
1391 if isinstance (backup_index, list) is True:
1393 elif len(backup_indexes_paths) == 0:
1399 if not isinstance(backup_tar_path, str):
1400 raise Exception('Backup tar path must be a string')
1402 if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path):
1403 raise Exception('Source path "%s" does not exist or is not a '\
1404 'file' % backup_tar_path)
1406 if not os.access(backup_tar_path, os.R_OK):
1407 raise Exception('Source path "%s" is not readable' % backup_tar_path)
1409 if not isinstance(backup_indexes_paths, list):
1410 raise Exception('backup_indexes_paths must be a list')
1412 if self.mode.startswith(':') or self.mode.startswith('|'):
1413 raise Exception('Restore only supports either uncompressed tars'
1414 ' or concat compression when restoring from an index, and '
1415 ' the open mode you provided is "%s"' % self.mode)
1417 for index in backup_indexes_paths:
1418 if not isinstance(index, str):
1419 raise Exception('indices must be strings')
1421 if not os.path.exists(index) or not os.path.isfile(index):
1422 raise Exception('Index path "%s" does not exist or is not a '\
1425 if not os.access(index, os.R_OK):
1426 raise Exception('Index path "%s" is not readable' % index)
1428 # try to create backup path if needed
1429 os.makedirs(target_path, exist_ok=True)
1431 # make backup_tar_path absolute so that iterate_tar_path works fine
1432 if backup_tar_path and not os.path.isabs(backup_tar_path):
1433 backup_tar_path = os.path.abspath(backup_tar_path)
1436 os.chdir(target_path)
1438 # setup for decrypting payload
1439 if self.decryptor is None:
1440 self.decryptor = self.initialize_encryption (CRYPTO_MODE_DECRYPT)
1443 index_it = self.iterate_tar_path(backup_tar_path)
1444 helper = RestoreHelper(self, cwd, backup_path=backup_tar_path,
1445 tarobj=index_it.tar_obj)
1446 elif mode == "diff":
1447 helper = RestoreHelper(self, cwd, backup_indexes_paths,
1450 # get iterator from newest index at _data[0]
1451 index1 = helper._data[0]["path"]
1452 index_it = self.iterate_index_path(index1)
1453 except tarfile.DecryptionError as exn:
1454 self.logger.error("failed to decrypt file [%s]: %s; is this an "
1455 "actual encrypted index file?"
1456 % (index1, str (exn)))
1457 return [(index1, exn)]
1458 except Exception as exn:
1460 self.logger.error("failed to read file [%s]: %s; is this an "
1461 "actual index file?" % (index1, str (exn)))
1462 return [(index1, exn)]
1463 elif mode == "disaster":
1464 index_it = self.iterate_disaster_index (backup_index)
1465 helper = RestoreHelper (self, cwd, backup_path=backup_tar_path,
1466 backup_index=backup_index,
1470 dir_it = self._recursive_walk_dir('.')
1471 dir_path_it = self.jsonize_path_iterator(dir_it)
1473 failed = [] # irrecoverable files
1475 # for each file to be restored, do:
1476 for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it):
1478 upath = dpath['path']
1479 op_type = dpath['type']
1481 upath = self.unprefixed(ipath['path'])
1482 op_type = ipath['type']
1485 if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH:
1488 # if types of the file mismatch, the file needs to be deleted
1490 if ipath is not None and dpath is not None and\
1491 dpath['type'] != ipath['type']:
1492 helper.delete(upath)
1494 # if file not found in dpath, we can directly restore from index
1496 # if the file doesn't exist and it needs to be deleted, it
1497 # means that work is already done
1498 if ipath['path'].startswith('delete://'):
1501 self.logger.debug("restore %s" % ipath['path'])
1502 helper.restore(ipath, l_no, restore_callback)
1503 except Exception as e:
1504 iipath = ipath.get ("path", "")
1505 self.logger.error("FAILED to restore: {} ({})"
1507 if disaster != tarfile.TOLERANCE_STRICT:
1508 failed.append ((iipath, e))
1511 # if both files are equal, we have nothing to restore
1512 if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True):
1515 # we have to restore the file, but first we need to delete the
1516 # current existing file.
1517 # we don't delete the file if it's a directory, because it might
1518 # just have changed mtime, so it's quite inefficient to remove
1521 if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'):
1522 helper.delete(upath)
1523 self.logger.debug("restore %s" % ipath['path'])
1525 helper.restore(ipath, l_no, restore_callback)
1526 except Exception as e:
1527 if disaster == tarfile.TOLERANCE_STRICT:
1529 failed.append ((ipath.get ("path", ""), e))
1532 # if the file is not in the index (so it comes from the target
1533 # directory) then we have to delete it
1535 self.logger.debug("delete %s" % upath)
1536 helper.delete(upath)
1538 helper.restore_directories_permissions()
1546 def recover_backup(self, target_path, backup_indexes_paths=[],
1547 restore_callback=None):
1549 Walk the index, extracting objects in disaster mode. Bad files are
1550 reported along with a reason.
1552 return self.restore_backup(target_path,
1553 backup_indexes_paths=backup_indexes_paths,
1554 disaster=tarfile.TOLERANCE_RECOVER)
1557 def rescue_backup(self, target_path, backup_tar_path,
1558 restore_callback=None):
1560 More aggressive “unfsck” mode: do not rely on the index data as the
1561 files may be corrupt; skim files for header-like information and
1562 attempt to retrieve the data.
1564 def gen_volume_name (nvol):
1565 return os.path.join (os.path.dirname (backup_tar_path),
1566 self.volume_name_func (backup_tar_path,
1570 backup_index = tarfile.gen_rescue_index (gen_volume_name,
1572 password=self.password,
1573 key=self.crypto_key)
1575 return self.restore_backup(target_path,
1576 backup_index=backup_index,
1577 backup_tar_path=backup_tar_path,
1578 disaster=tarfile.TOLERANCE_RESCUE)
1581 def _parse_json_line(self, f, l_no):
1583 Read line from file like object and process it as JSON.
1588 j = json.loads(l.decode('UTF-8'))
1589 except UnicodeDecodeError as e:
1590 if tuple (l [0:2]) == tarfile.GZ_MAGIC:
1592 ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])"
1593 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1596 ("error parsing line #%d as json: not a text file (%d B: [%s..])"
1597 % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \
1599 except ValueError as e:
1600 raise Exception("error parsing this json line "
1601 "(line number %d): %s" % (l_no, l))
1605 class RestoreHelper(object):
1607 Class used to help to restore files from indices
1610 # holds the dicts of data
1617 # list of directories to be restored. This is done as a last step, see
1618 # tarfile.extractall for details.
1621 _disaster = tarfile.TOLERANCE_STRICT
1623 def __init__(self, deltatar, cwd, index_list=None, backup_path=False,
1624 backup_index=None, tarobj=None,
1625 disaster=tarfile.TOLERANCE_STRICT):
1627 Constructor opens the tars and init the data structures.
1631 - Index list must be provided in reverse order (newer first).
1632 - “newer first” apparently means that if there are n backups
1633 provided, the last full backup is at index n-1 and the most recent
1634 diff backup is at index 0.
1635 - Only the first, the second, and the last elements of
1636 ``index_list`` are relevant, others will not be accessed.
1637 - If no ``index_list`` is provided, both ``tarobj`` and
1638 ``backup_path`` must be passed.
1639 - If ``index_list`` is provided, the values of ``tarobj`` and
1640 ``backup_path`` are ignored.
1643 self._directories = []
1644 self._deltatar = deltatar
1646 self._password = deltatar.password
1647 self._crypto_key = deltatar.crypto_key
1648 self._decryptors = []
1649 self._disaster = disaster
1656 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1657 self.canchown = True
1659 self.canchown = False
1661 if isinstance (backup_index, list) is True:
1662 decryptor = self._deltatar.decryptor
1664 [{ "curr_vol_no" : None
1668 , "path" : backup_path
1671 , "last_itelement" : None
1673 , "new_volume_handler" :
1674 partial(self.new_volume_handler,
1675 self._deltatar, self._cwd, True,
1676 os.path.dirname(backup_path), decryptor)
1677 , "decryptor" : decryptor
1679 elif index_list is not None:
1680 for index in index_list:
1681 is_full = index == index_list[-1]
1684 if self._password is not None:
1685 decryptor = crypto.Decrypt (password=self._password,
1686 key=self._crypto_key)
1688 # make paths absolute to avoid cwd problems
1689 if not os.path.isabs(index):
1690 index = os.path.normpath(os.path.join(cwd, index))
1700 last_itelement = None,
1702 new_volume_handler = partial(self.new_volume_handler,
1703 self._deltatar, self._cwd, is_full,
1704 os.path.dirname(index), decryptor),
1705 decryptor = decryptor
1707 self._data.append(s)
1709 # make paths absolute to avoid cwd problems
1710 if not os.path.isabs(backup_path):
1711 backup_path = os.path.normpath(os.path.join(cwd, backup_path))
1713 # update the new_volume_handler of tar_obj
1714 tarobj.new_volume_handler = partial(self.new_volume_handler,
1715 self._deltatar, self._cwd, True, os.path.dirname(backup_path),
1716 self._deltatar.decryptor)
1725 last_itelement = None,
1727 new_volume_handler = tarobj.new_volume_handler,
1728 decryptor = self._deltatar.decryptor
1730 self._data.append(s)
1735 Closes all open files
1737 for data in self._data:
1739 data['vol_fd'].close()
1740 data['vol_fd'] = None
1742 data['tarobj'].close()
1743 data['tarobj'] = None
1745 def delete(self, path):
1749 if not os.path.exists(path):
1752 # to preserve parent directory mtime, we save it
1753 parent_dir = os.path.dirname(path) or os.getcwd()
1754 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1756 if os.path.isdir(path) and not os.path.islink(path):
1761 # now we restore parent_directory mtime
1762 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1764 def restore(self, itpath, l_no, callback=None):
1766 Restore the path from the appropriate backup. Receives the current path
1767 from the newest (=first) index iterator. itpath must be not null.
1768 callback is a custom function that gets called for every file.
1770 NB: This function takes the attribute ``_data`` as input but will only
1771 ever use its first and, if available, second element. Anything else in
1772 ``._data[]`` will be ignored.
1774 path = itpath['path']
1776 # Calls the callback function
1780 if path.startswith('delete://'):
1781 # the file has previously been deleted already in restore_backup in
1782 # all cases so we just need to finish
1785 # get data from newest index (_data[0])
1786 data = self._data[0]
1787 upath = self._deltatar.unprefixed(path)
1789 # to preserve parent directory mtime, we save it
1790 parent_dir = os.path.dirname(upath) or os.getcwd()
1791 os.makedirs(parent_dir, exist_ok=True)
1792 parent_dir_mtime = int(os.stat(parent_dir).st_mtime)
1794 # if path is found in the newest index as to be snapshotted, deal with it
1796 if path.startswith('snapshot://'):
1797 self.restore_file(itpath, data, path, l_no, upath)
1799 # now we restore parent_directory mtime
1800 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1803 # we go from index to index, finding the path in the index, then finding
1804 # the index with the most recent snapshot of the file being restored
1806 # Right now we support diff backups, only. No incremental backups.
1807 # As a result _data[0] is always the diff backup index
1808 # and _data[1] the full backup index.
1809 if len(self._data) == 2:
1810 data = self._data[1]
1811 d, l_no, dpath = self.find_path_in_index(data, upath)
1813 self._deltatar.logger.warning('Error restoring file %s from '
1814 'index, not found in index %s' % (path, data['path']))
1817 cur_path = d.get('path', '')
1818 if cur_path.startswith('delete://'):
1819 self._deltatar.logger.warning(('Strange thing happened, file '
1820 '%s was listed in first index but deleted by another '
1821 'one. Path was ignored and untouched.') % path)
1823 elif cur_path.startswith('snapshot://'):
1824 # this code path is reached when the file is unchanged
1825 # in the newest index and therefore of type 'list://'
1826 self.restore_file(d, data, path, l_no, dpath)
1828 # now we restore parent_directory mtime
1829 os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime))
1832 # error code path is reached when:
1833 # a) we have more than two indexes (unsupported atm)
1834 # b) both indexes contain a list:// entry (logic error)
1835 # c) we have just one index and it also contains list://
1836 self._deltatar.logger.warning(('Error restoring file %s from index, '
1837 'snapshot not found in any index') % path)
1839 def find_path_in_index(self, data, upath):
1840 # NOTE: we restart the iterator sometimes because the iterator can be
1841 # walked over completely multiple times, for example if one path if not
1842 # found in one index and we have to go to the next index.
1843 it = data['iterator']
1845 it = data['iterator'] = self._deltatar.iterate_index_path(data["path"])
1846 d, l_no = it.__next__()
1848 d = data['last_itelement']
1849 l_no = data['last_lno']
1852 dpath = self._deltatar.unprefixed(d.get('path', ''))
1854 data['last_itelement'] = d
1855 data['last_lno'] = l_no
1856 return d, l_no, dpath
1858 up, dp = self._deltatar.compare_indexes(upath, dpath)
1859 # any time upath should have appeared before current dpath, it means
1860 # upath is just not in this index and we should stop
1862 data['last_itelement'] = d
1863 data['last_lno'] = l_no
1867 d, l_no = it.__next__()
1868 except StopIteration:
1869 data['last_itelement'] = d
1870 data['last_lno'] = l_no
1873 def restore_directories_permissions(self):
1875 Restore directory permissions when everything have been restored
1882 self._directories.sort(key=operator.attrgetter('name'))
1883 self._directories.reverse()
1885 # Set correct owner, mtime and filemode on directories.
1886 for member in self._directories:
1887 dirpath = member.name
1889 os.chmod(dirpath, member.mode)
1890 os.utime(dirpath, (member.mtime, member.mtime))
1892 # We have to be root to do so.
1894 g = grp.getgrnam(member.gname)[2]
1898 u = pwd.getpwnam(member.uname)[2]
1902 if member.issym and hasattr(os, "lchown"):
1903 os.lchown(dirpath, u, g)
1905 os.chown(dirpath, u, g)
1906 except EnvironmentError:
1907 raise tarfile.ExtractError("could not change owner")
1909 except tarfile.ExtractError as e:
1910 self._deltatar.logger.warning('tarfile: %s' % e)
1913 def new_volume_handler(deltarobj, cwd, is_full, backup_path, encryption, tarobj, base_name, volume_number):
1915 Handles the new volumes
1917 volume_name = deltarobj.volume_name_func(backup_path, is_full,
1918 volume_number, guess_name=True)
1919 volume_path = os.path.join(backup_path, volume_name)
1921 # we convert relative paths into absolute because CWD is changed
1922 if not os.path.isabs(volume_path):
1923 volume_path = os.path.join(cwd, volume_path)
1924 tarobj.open_volume(volume_path, encryption=encryption)
1926 def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
1928 Restores a snapshot of a file from a specific backup
1930 op_type = file_data.get('type', -1)
1931 member = file_data.get('member', None)
1932 ismember = bool(member)
1934 # when member is set, then we can assume everything is right and we
1935 # just have to restore the path
1937 vol_no = file_data.get('volume', -1)
1939 if not isinstance(vol_no, int) or vol_no < 0:
1940 self._deltatar.logger.warning('unrecognized type to be restored: '
1941 '%s, line %d' % (op_type, l_no))
1943 # setup the volume that needs to be read. only needed when member is
1945 if index_data['curr_vol_no'] != vol_no:
1946 index_data['curr_vol_no'] = vol_no
1947 backup_path = os.path.dirname(index_data['path'])
1948 vol_name = self._deltatar.volume_name_func(backup_path,
1949 index_data['is_full'], vol_no, guess_name=True)
1950 vol_path = os.path.join(backup_path, vol_name)
1951 if index_data['vol_fd']:
1952 index_data['vol_fd'].close()
1953 index_data['vol_fd'] = open(vol_path, 'rb')
1955 # force reopen of the tarobj because of new volume
1956 if index_data['tarobj']:
1957 index_data['tarobj'].close()
1958 index_data['tarobj'] = None
1960 # seek tarfile if needed
1961 offset = file_data.get('offset', -1)
1962 if index_data['tarobj']:
1963 if self._disaster == tarfile.TOLERANCE_RESCUE:
1964 # force a seek and reopen
1965 index_data['tarobj'].close()
1966 index_data['tarobj'] = None
1969 member = index_data['tarobj'].__iter__().__next__()
1970 except tarfile.DecryptionError:
1972 except tarfile.CompressionError:
1975 if not member or member.path != file_data['path']:
1976 # force a seek and reopen
1977 index_data['tarobj'].close()
1978 index_data['tarobj'] = None
1981 # open the tarfile if needed
1982 if not index_data['tarobj']:
1983 index_data['vol_fd'].seek(offset)
1984 index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode,
1985 fileobj=index_data['vol_fd'],
1986 format=tarfile.GNU_FORMAT,
1987 concat='#' in self._deltatar.mode,
1988 encryption=index_data["decryptor"],
1989 new_volume_handler=index_data['new_volume_handler'],
1990 save_to_members=False,
1991 tolerance=self._disaster)
1993 member = index_data['tarobj'].__iter__().__next__()
1995 member.path = unprefixed_path
1996 member.name = unprefixed_path
1998 if op_type == 'directory':
1999 self.add_member_dir(member)
2000 member = copy.copy(member)
2001 member.mode = 0o0700
2003 # if it's an existing directory, we then don't need to recreate it
2004 # just set the right permissions, mtime and that kind of stuff
2005 if os.path.exists(member.path):
2009 # set current volume number in tarobj, otherwise the extraction of the
2010 # file might fail when trying to extract a multivolume member
2011 index_data['tarobj'].volume_number = index_data['curr_vol_no']
2013 def ignore_symlink (member, *_args):
2014 self._deltatar.logger.warning("Ignoring symlink %s" % member.name)
2016 # finally, restore the file
2017 index_data['tarobj'].extract(member, symlink_cb=ignore_symlink)
2019 def add_member_dir(self, member):
2021 Add member dir to be restored at the end
2023 if not self.canchown:
2024 self._directories.append(DirItem(name=member.name, mode=member.mode,
2025 mtime=member.mtime))
2027 self._directories.append(DirItem(name=member.name, mode=member.mode,
2028 mtime=member.mtime, gname=member.gname, uname=member.uname,
2029 uid=member.uid, gid=member.gid, issym=member.issym()))
2031 class DirItem(object):
2032 def __init__(self, **kwargs):
2033 for k, v in kwargs.items():