--- /dev/null
+#!/usr/bin/env python
+
+# Copyright (C) 2013 Intra2net AG
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see
+# <http://www.gnu.org/licenses/lgpl-3.0.html>
+
+# Author: Eduardo Robles Elvira <edulix@wadobo.com>
+
+import logging
+import datetime
+import os
+from functools import partial
+
+from . import tarfile
+
+
+class NullHandler(logging.Handler):
+ def emit(self, record):
+ pass
+logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler())
+
+class DeltaTar(object):
+ '''
+ Backup class used to create backups
+ '''
+
+ # list of files to exclude in the backup creation or restore operation. It
+ # can contain python regular expressions.
+ excluded_files = []
+
+ # list of files to include in the backup creation or restore operation. It
+ # can contain python regular expressions. If empty, all files in the source
+ # path will be backed up (when creating a backup) or all the files in the
+ # backup will be restored (when restoring a backuup), but if included_files
+ # is set then only the files include in the list will be processed.
+ included_files = []
+
+ # custom filter of files to be backed up (or restored). Unused and unset
+ # by default. The function receives a file path and must return a boolean.
+ filter_func = None
+
+ # mode in which the delta will be created (when creating a backup ) or
+ # opened (when restoring). Accepts the same modes as the tarfile library.
+ mode = "r#gz"
+
+ # used together with aes modes to encrypt and decrypt backups.
+ password = None
+
+ # python logger object.
+ logger = None
+
+ # whether the index is encrypted or not. Only makes sense to set it as True
+ # if mode includes aes128 or aes256.
+ index_encrypted = None
+
+ # current time for this backup. Used for file names and file creation checks
+ current_time = None
+
+ # valid tarfile modes and their corresponding default file extension
+ __file_extensions_dict = {
+ 'r:': 'tar',
+ 'r:gz': 'tar.gz',
+ 'r:bz2': 'tar.bz2',
+ 'w': 'tar',
+ 'w:gz': 'tar.gz',
+ 'w:bz2': 'tar.bz2',
+
+ 'r|': 'tar',
+ 'r|gz': 'tar.gz',
+ 'r|bz2': 'tar.bz2',
+ 'w|': 'tar',
+ 'w|gz': 'tar.gz',
+ 'w|bz2': 'tar.bz2',
+
+ 'r#gz': 'tar.gz',
+ 'w#gz': 'tar.gz',
+
+ 'r#gz.aes128': 'tar.gz.aes128',
+ 'w#gz.aes128': 'tar.gz.aes128',
+ 'r#gz.aes256': 'tar.gz.aes256',
+ 'w#gz.aes256': 'tar.gz.aes256'
+ }
+
+ def __init__(self, excluded_files=[], included_files=[],
+ filter_func=None, mode="r#gz", password=None,
+ logger=None,
+ index_encrypted=True, index_name_func=None,
+ volume_name_func=None):
+ '''
+ Constructor. Configures the diff engine.
+
+ Parameters:
+ - excluded_files: list of files to exclude in the backup creation or
+ restore operation. It can contain python regular expressions.
+
+ - included_files: list of files to include in the backup creation or
+ restore operation. It can contain python regular expressions. If
+ empty, all files in the source path will be backed up (when creating a
+ backup) or all the files in the backup will be restored (when
+ restoring a backuup), but if included_files is set then only the files
+ include in the list will be processed.
+
+ - filter_func: custom filter of files to be backed up (or restored).
+ Unused and unset by default. The function receives a file path and
+ must return a boolean.
+
+ - mode: mode in which the delta will be created (when creating a backup)
+ or opened (when restoring). Accepts the same modes as the tarfile
+ library. Valid modes are:
+
+ 'r:' open for reading exclusively uncompressed
+ 'r:gz' open for reading with gzip compression
+ 'r:bz2' open for reading with bzip2 compression
+ 'w:gz' open for writing with gzip compression
+ 'w:bz2' open for writing with bzip2 compression
+
+ 'r|' open an uncompressed stream of tar blocks for reading
+ 'r|gz' open a gzip compressed stream of tar blocks
+ 'r|bz2' open a bzip2 compressed stream of tar blocks
+ 'w|' open an uncompressed stream for writing
+ 'w|gz' open a gzip compressed stream for writing
+ 'w|bz2' open a bzip2 compressed stream for writing
+
+ 'r#gz' open a stream of gzip compressed tar blocks for reading
+ 'w#gz' open a stream of gzip compressed tar blocks for writing
+
+ 'r#gz.aes128' open an aes128 encrypted stream of gzip compressed tar blocks for reading
+ 'w#gz.aes128' open an aes128 encrypted stream of gzip compressed tar blocks for writing
+ 'r#gz.aes256' open an aes256 encrypted stream of gzip compressed tar blocks for reading
+ 'w#gz.aes256' open an aes256 encrypted stream of gzip compressed tar blocks for writing
+
+ - password: used together with aes modes to encrypt and decrypt backups.
+
+ - logger: python logger object. Optional.
+
+ - index_encrypted: whether the index is encrypted or not. Only makes
+ sense to set it as True if mode includes aes128 or aes256.
+
+ - index_name_func: function that sets a custom name for the index file.
+ This function receives the backup_path and if it's a full backup as
+ arguments and must return the name of the corresponding index file.
+ Optional, DeltaTar gives index files a "backup.index" name by default.
+
+ - volume_name_func: function that defines the name of tar volumes. It
+ receives the backup_path, if it's a full backup and the volume number,
+ and must return the name for the corresponding volume name. Optional,
+ DeltaTar has default names for tar volumes.
+ '''
+
+ if self.mode not in self.__file_extensions_dict:
+ raise Exception('Unrecognized extension')
+
+ self.excluded_files = excluded_files
+ self.included_files = included_files
+ self.filter_func = filter_func
+ self.logger = logging.getLogger('deltatar.DeltaTar')
+ if logger:
+ self.logger.addHandler(logger)
+ self.mode = mode
+ self.password = password
+ self.index_encrypted = index_encrypted
+ self.current_time = datetime.datetime.now()
+
+ if index_name_func is not None:
+ self.index_name_func = index_name_func
+
+ if volume_name_func is not None:
+ self.volume_name_func = volume_name_func
+
+ def index_name_func(self, is_full):
+ '''
+ function that sets a custom name for the index file. This function
+ receives the backup_path and if it's a full backup as arguments and must
+ return the name of the corresponding index file. Optional, DeltaTar gives
+ index files a name by default.
+ '''
+ prefix = "bfull" if is_full else "bdiff"
+ date_str = self.current_time.strftime("%y-%m-%d-%H%M")
+ extension = ''
+
+ if 'gz' in self.mode:
+ extension += ".gz"
+ elif 'bz2' in self.mode:
+ extension += ".bz2"
+
+ if self.index_encrypted and 'aes128' in self.mode:
+ extension += ".aes128"
+ elif self.index_encrypted and 'aes256' in self.mode:
+ extension += ".aes256"
+
+ return "%s-%s-index.%s" % (prefix, date_str, extension)
+
+ def volume_name_func(self, backup_path, is_full, volume_number):
+ '''
+ function that defines the name of tar volumes. It receives the
+ backup_path, if it's a full backup and the volume number, and must return
+ the name for the corresponding volume name. Optional, DeltaTar has default
+ names for tar volumes.
+ '''
+ prefix = "bfull" if is_full else "bdiff"
+ date_str = self.current_time.strftime("%y-%m-%d-%H%M")
+ extension = self.__file_extensions_dict[self.mode]
+
+ return "%s-%s-%03d.%s" % (prefix, date_str, volume_number + 1, extension)
+
+ def _recursive_walk_dir(self, source_path):
+ '''
+ Walk a directory recursively, yielding each file/directory
+
+ TODO: do filtering with self.included_files etc
+ '''
+
+ def walk_dir(dir_path):
+ '''
+ Walk a directory, yielding each file/directory
+ '''
+ for filename in os.listdir(dir_path):
+ file_path = os.path.join(dir_path, filename)
+ if not os.access(file_path, os.R_OK):
+ self.logger.warn('Error accessing possibly locked file %s' % file_path)
+ continue
+ yield file_path
+
+ diryield_stack = [walk_dir(source_path)]
+ delayed_path_stack = []
+
+ while diryield_stack:
+ try:
+ cur_path = diryield_stack[-1].next()
+ except StopIteration:
+ diryield_stack.pop()
+ if delayed_path_stack:
+ delayed_path_stack.pop()
+ continue
+
+ if delayed_path_stack:
+ for delayed_path in delayed_path_stack:
+ yield delayed_path
+ del delayed_path_stack[:]
+
+ yield cur_path
+
+ if os.path.isdir(cur_path):
+ diryield_stack.append(walk_dir(cur_path))
+
+ def create_full_backup(self, source_path, backup_path,
+ max_volume_size=None):
+ '''
+ Creates a full backup.
+
+ Parameters:
+ - source_path: source path to the directory to back up.
+ - backup_path: path where the back up will be stored. Backup path will
+ be created if not existent.
+ - max_volume_size: maximum volume size. Used to split the backup in
+ volumes. Optional (won't split in volumes by default).
+ '''
+ # check input
+ if not isinstance(source_path, basestring):
+ raise Exception('Source path must be a string')
+
+ if not isinstance(backup_path, basestring):
+ raise Exception('Backup path must be a string')
+
+ if not os.path.exists(source_path) or not os.path.isdir(source_path):
+ raise Exception('Source path "%s" does not exist or is not a '\
+ 'directory' % source_path)
+
+ if not os.access(source_path, os.R_OK):
+ raise Exception('Source path "%s" is not readable' % source_path)
+
+ # try to create backup path if needed
+ if not os.path.exists(backup_path):
+ os.makedirs(backup_path)
+
+ if not os.access(backup_path, os.W_OK):
+ raise Exception('Backup path "%s" is not writeable' % backup_path)
+
+ if source_path.endswith('/'):
+ source_path = source_path[:-1]
+
+ if backup_path.endswith('/'):
+ backup_path = backup_path[:-1]
+
+ # update current time
+ self.current_time = datetime.datetime.now()
+
+ if self.mode not in self.__file_extensions_dict:
+ raise Exception('Unrecognized extension')
+
+ # some initialization
+ vol_no = 0
+
+ # generate the first volume name
+ vol_name = self.volume_name_func(backup_path, True, 0)
+ tarfile_path = os.path.join(backup_path, vol_name)
+
+ def new_volume_handler(deltarobj, tarobj, base_name, volume_number):
+ '''
+ Handles the new volumes
+ '''
+ volume_path = deltarobj.volume_name_func(True, volume_number)
+ tarobj.open_volume(volume_path)
+ new_volume_handler = partial(new_volume_handler, self)
+
+ # start creating the tarfile
+ tarobj = tarfile.TarFile.open(tarfile_path,
+ mode=self.mode,
+ format=tarfile.GNU_FORMAT,
+ concat_compression='#gz' in self.mode,
+ password=self.password,
+ max_volume_size=max_volume_size,
+ new_volume_handler=new_volume_handler)
+
+ for i in self._recursive_walk_dir(source_path):
+ tarobj.add(i)
+
+ tarobj.close()
+
+
+ def create_diff_backup(self, source_path, backup_path, previous_index_path,
+ max_volume_size=None):
+ '''
+ Creates a backup.
+
+ Parameters:
+ - source_path: source path to the directory to back up.
+ - backup_path: path where the back up will be stored. Backup path will
+ be created if not existent.
+ - previous_index_path: index of the previous backup, needed to know
+ which files changed since then.
+ - max_volume_size: maximum volume size in megabytes (MB). Used to split
+ the backup in volumes. Optional (won't split in volumes by default).
+ '''
+ pass
+
+ def restore_backup(self, target_path, backup_indexes_paths=[],
+ backup_tar_path=None):
+ '''
+ Restores a backup.
+
+ Parameters:
+ - backup_path: path where the back up will is stored.
+ - target_path: path to restore.
+ - backup_indexes_paths: path to backup indexes, in descending date order.
+ The indexes indicate the location of their respective backup volumes,
+ and multiple indexes are needed to be able to restore diff backups.
+ Note that this is an optional parameter: if not suplied, it will
+ try to restore directly from backup_tar_path.
+ - backup_tar_path: path to the backup tar file. Used as an alternative
+ to backup_indexes_paths to restore directly from a tar file without
+ using any file index. If it's a multivol tarfile, volume_name_func
+ will be called.
+ '''
+ pass