# python logger object.
logger = None
- # whether the index is encrypted or not. Only makes sense to set it as True
- # if mode includes aes128 or aes256.
- index_encrypted = None
+ # specifies the index mode in the same format as @param mode, but without
+ # the ':', '|' or '#' at the begining. It doesn't make sense to specify
+ # that the index is encrypted if no no password is given in the constructor.
+ index_mode = None
# current time for this backup. Used for file names and file creation checks
current_time = None
'#gz.aes256': '.gz.aes256'
}
+ # valid index modes and their corresponding default file extension
+ __index_extensions_dict = {
+ '': '',
+ 'gz': '.gz',
+ 'bz2': '.bz2',
+ 'gz.aes128': '.gz.aes128',
+ 'gz.aes256': '.gz.aes256'
+ }
+
# valid path prefixes
__path_prefix_list = [
u'snapshot://',
def __init__(self, excluded_files=[], included_files=[],
filter_func=None, mode="", password=None,
- logger=None,
- index_encrypted=False, index_name_func=None,
+ logger=None, index_mode=None, index_name_func=None,
volume_name_func=None):
'''
Constructor. Configures the diff engine.
- logger: python logger object. Optional.
- - index_encrypted: whether the index is encrypted or not. Only makes
- sense to set it as True if mode includes aes128 or aes256.
+ - index_mode: specifies the index mode in the same format as @param
+ mode, but without the ':', '|' or '#' at the begining. It doesn't
+ make sense to specify that the index is encrypted if no no password
+ is given in the constructor. This is an optional parameter that will
+ automatically mimic @param mode by default if not provided. Valid
+ modes are:
+
+ '' open uncompressed
+ 'gz' open with gzip compression
+ 'bz2' open with bzip2 compression
+ 'gz.aes128' open an aes128 encrypted stream of gzip compressed tar blocks
+ 'gz.aes256' open an aes256 encrypted stream of gzip compressed tar blocks
- index_name_func: function that sets a custom name for the index file.
This function receives the backup_path and if it's a full backup as
self.logger.addHandler(logger)
self.mode = mode
self.password = password
- self.index_encrypted = index_encrypted
+
+ # generate index_mode
+ if index_mode is None:
+ index_mode = ''
+ if 'gz.aes' in mode:
+ index_mode = mode[1:]
+ elif 'gz' in mode:
+ index_mode = "gz"
+ elif 'bz2' in mode:
+ index_mode = "bz2"
+ elif mode not in self.__index_extensions_dict:
+ raise Exception('Unrecognized extension')
+
+ self.index_mode = index_mode
self.current_time = datetime.datetime.now()
if index_name_func is not None:
'''
prefix = "bfull" if is_full else "bdiff"
date_str = self.current_time.strftime("%y-%m-%d-%H%M")
- extension = ''
-
- if self.index_encrypted and 'aes' in self.mode:
- extension = self.__file_extensions_dict[self.mode]
+ extension = self.__index_extensions_dict[self.index_mode]
return "%s-%s.index%s" % (prefix, date_str, extension)
return path[len(prefix):]
return path
+ def open_index(self, path, mode='r'):
+ '''
+ Given the specified configuration, opens the index for reading or
+ writing. It transparently handles if the index is encrypted and/or
+ compressed, returning a file object reading to use.
+ '''
+ filemode = None
+
+ if self.index_mode.startswith('gz'):
+ comptype = 'gz'
+ elif self.index_mode.startswith('bz2'):
+ comptype = 'bz2'
+ else:
+ comptype = 'tar'
+
+ enctype = ''
+ if 'aes' in self.index_mode:
+ enctype = 'aes'
+
+ key_length = 128
+ if 'aes256' in self.index_mode:
+ key_length = 256
+
+ return tarfile._Stream(name=path, mode=mode, comptype=comptype,
+ bufsize=tarfile.RECORDSIZE, fileobj=None,
+ enctype=enctype, password=self.password,
+ key_length=key_length)
+
+
def create_full_backup(self, source_path, backup_path,
max_volume_size=None):
'''
# init index
index_name = self.index_name_func(True)
index_path = os.path.join(backup_path, index_name)
- # TODO: encrypt or compress it if necessary
- index_fd = open(index_path, 'w')
+ index_fd = self.open_index(index_path, 'w')
cwd = os.getcwd()
which files changed since then.
- max_volume_size: maximum volume size in megabytes (MB). Used to split
the backup in volumes. Optional (won't split in volumes by default).
+
+ NOTE: previous index is assumed to follow exactly the same format as
+ the index_mode setup in the constructor.
'''
# check/sanitize input
if not isinstance(source_path, basestring):
# init index
index_name = self.index_name_func(True)
index_path = os.path.join(backup_path, index_name)
- # TODO: encrypt or compress it if necessary
- index_fd = open(index_path, 'w')
+ index_fd = self.open_index(index_path, 'w')
cwd = os.getcwd()
def iterate_index_path(self, index_path):
# open
- f = open(index_path, 'r')
+ f = self.open_index(index_path, 'r')
# check index header
j, l_no = self._parse_json_line(f, 0)
if j.get("type", '') != 'python-delta-tar-index' or\
using any file index. If it's a multivol tarfile, volume_name_func
will be called.
- Note: If you want to use an index to restore a backup, this function
+ NOTE: If you want to use an index to restore a backup, this function
only supports to do so when the tarfile mode is either uncompressed or
uses concat compress mode, because otherwise it would be very slow.
+
+ NOTE: Indices are assumed to follow the same format as the index_mode
+ specified in the constructor.
'''
# check/sanitize input
if not isinstance(target_path, basestring):
self.key_length = key_length
self.password = password
self.last_block_offset = 0L
+ self.dbuf = ""
if comptype == "gz":
try:
"""Initialize for reading a gzip compressed fileobj.
"""
self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
- self.dbuf = ""
# if aes, we decrypt before the compression
if self.enctype == 'aes':
self.pos += len(buf)
return buf
+ def readline(self):
+ """Reads just one line, new line character included
+ """
+ buf = []
+ pos = 0
+ while True:
+ chunk = self._read(self.bufsize)
+
+ if not chunk:
+ return ''.join(buf)
+
+ buf.append(chunk)
+ if '\n' in chunk:
+ dbuf = ''.join(buf)
+ pos = dbuf.index('\n') + 1
+ self.dbuf = dbuf[pos:]
+ return dbuf[:pos]
+
def _read(self, size):
"""Return size bytes from the stream.
"""
- if self.comptype == "tar":
- return self.__read(size)
-
c = len(self.dbuf)
t = [self.dbuf]
while c < size:
buf = self.__read(self.bufsize)
if not buf:
break
- try:
- buf = self.cmp.decompress(buf)
- except IOError:
- raise ReadError("invalid compressed data")
- if self.comptype == "gz" and hasattr(self, "crc"):
- self.crc = self.zlib.crc32(buf, self.crc) & 0xffffffffL
- if self.concat_stream and len(self.cmp.unused_data) != 0:
- self.buf = self.cmp.unused_data + self.buf
- self.close(close_fileobj=False)
+ if self.comptype != "tar":
try:
- self._init_read_gz()
- except:
- # happens at the end of the file
- pass
- self.crc = self.zlib.crc32("") & 0xffffffffL
- self.closed = False
+ buf = self.cmp.decompress(buf)
+ except IOError:
+ raise ReadError("invalid compressed data")
+
+ if self.comptype == "gz" and hasattr(self, "crc"):
+ self.crc = self.zlib.crc32(buf, self.crc) & 0xffffffffL
+ if self.concat_stream and len(self.cmp.unused_data) != 0:
+ self.buf = self.cmp.unused_data + self.buf
+ self.close(close_fileobj=False)
+ try:
+ self._init_read_gz()
+ except:
+ # happens at the end of the file
+ pass
+ self.crc = self.zlib.crc32("") & 0xffffffffL
+ self.closed = False
t.append(buf)
c += len(buf)
t = "".join(t)