From: Eduardo Robles Elvira Date: Fri, 26 Jul 2013 16:26:06 +0000 (+0200) Subject: initial basic implementation of the create_full_backup and very basic unit test X-Git-Tag: v2.2~146 X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=0708a374e3efc143fc65b576cf1a458fd029df46;p=python-delta-tar initial basic implementation of the create_full_backup and very basic unit test --- diff --git a/deltatar/deltatar.py b/deltatar/deltatar.py new file mode 100644 index 0000000..c2f60f5 --- /dev/null +++ b/deltatar/deltatar.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python + +# Copyright (C) 2013 Intra2net AG +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# + +# Author: Eduardo Robles Elvira + +import logging +import datetime +import os +from functools import partial + +from . import tarfile + + +class NullHandler(logging.Handler): + def emit(self, record): + pass +logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler()) + +class DeltaTar(object): + ''' + Backup class used to create backups + ''' + + # list of files to exclude in the backup creation or restore operation. It + # can contain python regular expressions. + excluded_files = [] + + # list of files to include in the backup creation or restore operation. It + # can contain python regular expressions. If empty, all files in the source + # path will be backed up (when creating a backup) or all the files in the + # backup will be restored (when restoring a backuup), but if included_files + # is set then only the files include in the list will be processed. + included_files = [] + + # custom filter of files to be backed up (or restored). Unused and unset + # by default. The function receives a file path and must return a boolean. + filter_func = None + + # mode in which the delta will be created (when creating a backup ) or + # opened (when restoring). Accepts the same modes as the tarfile library. + mode = "r#gz" + + # used together with aes modes to encrypt and decrypt backups. + password = None + + # python logger object. + logger = None + + # whether the index is encrypted or not. Only makes sense to set it as True + # if mode includes aes128 or aes256. + index_encrypted = None + + # current time for this backup. Used for file names and file creation checks + current_time = None + + # valid tarfile modes and their corresponding default file extension + __file_extensions_dict = { + 'r:': 'tar', + 'r:gz': 'tar.gz', + 'r:bz2': 'tar.bz2', + 'w': 'tar', + 'w:gz': 'tar.gz', + 'w:bz2': 'tar.bz2', + + 'r|': 'tar', + 'r|gz': 'tar.gz', + 'r|bz2': 'tar.bz2', + 'w|': 'tar', + 'w|gz': 'tar.gz', + 'w|bz2': 'tar.bz2', + + 'r#gz': 'tar.gz', + 'w#gz': 'tar.gz', + + 'r#gz.aes128': 'tar.gz.aes128', + 'w#gz.aes128': 'tar.gz.aes128', + 'r#gz.aes256': 'tar.gz.aes256', + 'w#gz.aes256': 'tar.gz.aes256' + } + + def __init__(self, excluded_files=[], included_files=[], + filter_func=None, mode="r#gz", password=None, + logger=None, + index_encrypted=True, index_name_func=None, + volume_name_func=None): + ''' + Constructor. Configures the diff engine. + + Parameters: + - excluded_files: list of files to exclude in the backup creation or + restore operation. It can contain python regular expressions. + + - included_files: list of files to include in the backup creation or + restore operation. It can contain python regular expressions. If + empty, all files in the source path will be backed up (when creating a + backup) or all the files in the backup will be restored (when + restoring a backuup), but if included_files is set then only the files + include in the list will be processed. + + - filter_func: custom filter of files to be backed up (or restored). + Unused and unset by default. The function receives a file path and + must return a boolean. + + - mode: mode in which the delta will be created (when creating a backup) + or opened (when restoring). Accepts the same modes as the tarfile + library. Valid modes are: + + 'r:' open for reading exclusively uncompressed + 'r:gz' open for reading with gzip compression + 'r:bz2' open for reading with bzip2 compression + 'w:gz' open for writing with gzip compression + 'w:bz2' open for writing with bzip2 compression + + 'r|' open an uncompressed stream of tar blocks for reading + 'r|gz' open a gzip compressed stream of tar blocks + 'r|bz2' open a bzip2 compressed stream of tar blocks + 'w|' open an uncompressed stream for writing + 'w|gz' open a gzip compressed stream for writing + 'w|bz2' open a bzip2 compressed stream for writing + + 'r#gz' open a stream of gzip compressed tar blocks for reading + 'w#gz' open a stream of gzip compressed tar blocks for writing + + 'r#gz.aes128' open an aes128 encrypted stream of gzip compressed tar blocks for reading + 'w#gz.aes128' open an aes128 encrypted stream of gzip compressed tar blocks for writing + 'r#gz.aes256' open an aes256 encrypted stream of gzip compressed tar blocks for reading + 'w#gz.aes256' open an aes256 encrypted stream of gzip compressed tar blocks for writing + + - password: used together with aes modes to encrypt and decrypt backups. + + - logger: python logger object. Optional. + + - index_encrypted: whether the index is encrypted or not. Only makes + sense to set it as True if mode includes aes128 or aes256. + + - index_name_func: function that sets a custom name for the index file. + This function receives the backup_path and if it's a full backup as + arguments and must return the name of the corresponding index file. + Optional, DeltaTar gives index files a "backup.index" name by default. + + - volume_name_func: function that defines the name of tar volumes. It + receives the backup_path, if it's a full backup and the volume number, + and must return the name for the corresponding volume name. Optional, + DeltaTar has default names for tar volumes. + ''' + + if self.mode not in self.__file_extensions_dict: + raise Exception('Unrecognized extension') + + self.excluded_files = excluded_files + self.included_files = included_files + self.filter_func = filter_func + self.logger = logging.getLogger('deltatar.DeltaTar') + if logger: + self.logger.addHandler(logger) + self.mode = mode + self.password = password + self.index_encrypted = index_encrypted + self.current_time = datetime.datetime.now() + + if index_name_func is not None: + self.index_name_func = index_name_func + + if volume_name_func is not None: + self.volume_name_func = volume_name_func + + def index_name_func(self, is_full): + ''' + function that sets a custom name for the index file. This function + receives the backup_path and if it's a full backup as arguments and must + return the name of the corresponding index file. Optional, DeltaTar gives + index files a name by default. + ''' + prefix = "bfull" if is_full else "bdiff" + date_str = self.current_time.strftime("%y-%m-%d-%H%M") + extension = '' + + if 'gz' in self.mode: + extension += ".gz" + elif 'bz2' in self.mode: + extension += ".bz2" + + if self.index_encrypted and 'aes128' in self.mode: + extension += ".aes128" + elif self.index_encrypted and 'aes256' in self.mode: + extension += ".aes256" + + return "%s-%s-index.%s" % (prefix, date_str, extension) + + def volume_name_func(self, backup_path, is_full, volume_number): + ''' + function that defines the name of tar volumes. It receives the + backup_path, if it's a full backup and the volume number, and must return + the name for the corresponding volume name. Optional, DeltaTar has default + names for tar volumes. + ''' + prefix = "bfull" if is_full else "bdiff" + date_str = self.current_time.strftime("%y-%m-%d-%H%M") + extension = self.__file_extensions_dict[self.mode] + + return "%s-%s-%03d.%s" % (prefix, date_str, volume_number + 1, extension) + + def _recursive_walk_dir(self, source_path): + ''' + Walk a directory recursively, yielding each file/directory + + TODO: do filtering with self.included_files etc + ''' + + def walk_dir(dir_path): + ''' + Walk a directory, yielding each file/directory + ''' + for filename in os.listdir(dir_path): + file_path = os.path.join(dir_path, filename) + if not os.access(file_path, os.R_OK): + self.logger.warn('Error accessing possibly locked file %s' % file_path) + continue + yield file_path + + diryield_stack = [walk_dir(source_path)] + delayed_path_stack = [] + + while diryield_stack: + try: + cur_path = diryield_stack[-1].next() + except StopIteration: + diryield_stack.pop() + if delayed_path_stack: + delayed_path_stack.pop() + continue + + if delayed_path_stack: + for delayed_path in delayed_path_stack: + yield delayed_path + del delayed_path_stack[:] + + yield cur_path + + if os.path.isdir(cur_path): + diryield_stack.append(walk_dir(cur_path)) + + def create_full_backup(self, source_path, backup_path, + max_volume_size=None): + ''' + Creates a full backup. + + Parameters: + - source_path: source path to the directory to back up. + - backup_path: path where the back up will be stored. Backup path will + be created if not existent. + - max_volume_size: maximum volume size. Used to split the backup in + volumes. Optional (won't split in volumes by default). + ''' + # check input + if not isinstance(source_path, basestring): + raise Exception('Source path must be a string') + + if not isinstance(backup_path, basestring): + raise Exception('Backup path must be a string') + + if not os.path.exists(source_path) or not os.path.isdir(source_path): + raise Exception('Source path "%s" does not exist or is not a '\ + 'directory' % source_path) + + if not os.access(source_path, os.R_OK): + raise Exception('Source path "%s" is not readable' % source_path) + + # try to create backup path if needed + if not os.path.exists(backup_path): + os.makedirs(backup_path) + + if not os.access(backup_path, os.W_OK): + raise Exception('Backup path "%s" is not writeable' % backup_path) + + if source_path.endswith('/'): + source_path = source_path[:-1] + + if backup_path.endswith('/'): + backup_path = backup_path[:-1] + + # update current time + self.current_time = datetime.datetime.now() + + if self.mode not in self.__file_extensions_dict: + raise Exception('Unrecognized extension') + + # some initialization + vol_no = 0 + + # generate the first volume name + vol_name = self.volume_name_func(backup_path, True, 0) + tarfile_path = os.path.join(backup_path, vol_name) + + def new_volume_handler(deltarobj, tarobj, base_name, volume_number): + ''' + Handles the new volumes + ''' + volume_path = deltarobj.volume_name_func(True, volume_number) + tarobj.open_volume(volume_path) + new_volume_handler = partial(new_volume_handler, self) + + # start creating the tarfile + tarobj = tarfile.TarFile.open(tarfile_path, + mode=self.mode, + format=tarfile.GNU_FORMAT, + concat_compression='#gz' in self.mode, + password=self.password, + max_volume_size=max_volume_size, + new_volume_handler=new_volume_handler) + + for i in self._recursive_walk_dir(source_path): + tarobj.add(i) + + tarobj.close() + + + def create_diff_backup(self, source_path, backup_path, previous_index_path, + max_volume_size=None): + ''' + Creates a backup. + + Parameters: + - source_path: source path to the directory to back up. + - backup_path: path where the back up will be stored. Backup path will + be created if not existent. + - previous_index_path: index of the previous backup, needed to know + which files changed since then. + - max_volume_size: maximum volume size in megabytes (MB). Used to split + the backup in volumes. Optional (won't split in volumes by default). + ''' + pass + + def restore_backup(self, target_path, backup_indexes_paths=[], + backup_tar_path=None): + ''' + Restores a backup. + + Parameters: + - backup_path: path where the back up will is stored. + - target_path: path to restore. + - backup_indexes_paths: path to backup indexes, in descending date order. + The indexes indicate the location of their respective backup volumes, + and multiple indexes are needed to be able to restore diff backups. + Note that this is an optional parameter: if not suplied, it will + try to restore directly from backup_tar_path. + - backup_tar_path: path to the backup tar file. Used as an alternative + to backup_indexes_paths to restore directly from a tar file without + using any file index. If it's a multivol tarfile, volume_name_func + will be called. + ''' + pass diff --git a/runtests.py b/runtests.py index 90945e7..19b3740 100644 --- a/runtests.py +++ b/runtests.py @@ -23,6 +23,7 @@ from testing.test_multivol import MultivolGnuFormatTest, MultivolPaxFormatTest from testing.test_concat_compress import ConcatCompressTest from testing.test_rescue_tar import RescueTarTest from testing.test_encryption import EncryptionTest +from testing.test_deltatar import DeltaTarTest if __name__ == "__main__": unittest.main() diff --git a/testing/test_deltatar.py b/testing/test_deltatar.py new file mode 100644 index 0000000..3bcf659 --- /dev/null +++ b/testing/test_deltatar.py @@ -0,0 +1,63 @@ +# Copyright (C) 2013 Intra2net AG +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# + +# Author: Eduardo Robles Elvira + +import os +import logging + +from deltatar.deltatar import DeltaTar + +import filesplit +from . import BaseTest +from . import new_volume_handler + + +class DeltaTarTest(BaseTest): + """ + Test backups + """ + def setUp(self): + ''' + Create base test data + ''' + os.makedirs('source_dir/test/test2') + self.hash = dict() + self.hash["source_dir/big"] = self.create_file("source_dir/big", 50000) + self.hash["source_dir/small"] = self.create_file("source_dir/small", 100) + self.hash["source_dir/test/huge"] = self.create_file("source_dir/test/huge", 700000) + + self.consoleLogger = logging.StreamHandler() + self.consoleLogger.setLevel(logging.DEBUG) + + def tearDown(self): + ''' + Remove temporal files created by unit tests + ''' + os.system("rm -rf source_dir backup_dir") + + def test_create_simple_full_backup(self): + ''' + Creates a full backup without any filtering and restores it. + ''' + deltatar = DeltaTar(mode="w", logger=self.consoleLogger) + + # create first backup + deltatar.create_full_backup( + source_path="source_dir", + backup_path="backup_dir") + + assert os.path.exists("backup_dir") \ No newline at end of file