return match
-
def _recursive_walk_dir(self, source_path):
'''
Walk a directory recursively, yielding each file/directory
'''
Walk a directory, yielding each file/directory
'''
- for filename in os.listdir(dir_path):
+ for filename in sorted(os.listdir(dir_path)):
file_path = os.path.join(dir_path, filename)
is_dir = os.path.isdir(file_path)
if self.filter_path(file_path, source_path, is_dir) == NO_MATCH:
continue
yield file_path
- diryield_stack = [walk_dir(source_path)]
- delayed_path_stack = []
-
- while diryield_stack:
- try:
- cur_path = diryield_stack[-1].next()
- is_dir = os.path.isdir(cur_path)
- status = self.filter_path(cur_path, source_path, is_dir)
- except StopIteration:
- diryield_stack.pop()
- if delayed_path_stack:
- delayed_path_stack.pop()
- continue
+ queue = [source_path]
- if status == MATCH:
- if delayed_path_stack:
- for delayed_path in delayed_path_stack:
- is_dir = os.path.isdir(delayed_path)
- if self.filter_path(delayed_path, source_path, is_dir) == NO_MATCH:
- continue
- yield delayed_path
- del delayed_path_stack[:]
+ while queue:
+ cur_path = queue.pop()
- yield cur_path
+ for child in walk_dir(cur_path):
+ is_dir = os.path.isdir(child)
+ status = self.filter_path(child, source_path, is_dir)
- if os.path.isdir(cur_path):
- diryield_stack.append(walk_dir(cur_path))
+ if status == MATCH:
+ yield child
- elif status == PARENT_MATCH:
- delayed_path_stack.append(cur_path)
- diryield_stack.append(walk_dir(cur_path))
+ if is_dir and (status == MATCH or status == PARENT_MATCH):
+ queue.append(child)
def _stat_dict(self, path):
'''
ptype = None
if stat.S_ISDIR(mode):
- ptype = 'directory'
+ ptype = u'directory'
elif stat.S_ISREG(mode):
- ptype = 'file'
+ ptype = u'file'
elif stat.S_ISLNK(mode):
- ptype = 'link'
+ ptype = u'link'
return {
- 'type': ptype,
- 'path': path,
- 'mode': mode,
- 'mtime': stinfo.st_mtime,
- 'ctime': stinfo.st_ctime,
- 'uid': stinfo.st_uid,
- 'gid': stinfo.st_gid,
- 'inode': stinfo.st_ino,
- 'size': stinfo.st_size
+ u'type': ptype,
+ u'path': unicode(path),
+ u'mode': mode,
+ u'mtime': stinfo.st_mtime,
+ u'ctime': stinfo.st_ctime,
+ u'uid': stinfo.st_uid,
+ u'gid': stinfo.st_gid,
+ u'inode': stinfo.st_ino,
+ u'size': stinfo.st_size
}
+ def _equal_stat_dicts(self, d1, d2):
+ '''
+ Return if the dicts are equal in the stat keys
+ '''
+ keys = [u'gid', u'type', u'mode', u'mtime', u'path', u'size', u'inode',
+ u'ctime', u'uid']
+ for key in keys:
+ if d1.get(key, -1) != d2.get(key, -2):
+ return False
+ return True
+
def create_full_backup(self, source_path, backup_path,
max_volume_size=None):
'''
raise Exception('Source path "%s" does not exist or is not a '\
'directory' % source_path)
- if max_volume_size != None and not isinstance(max_volume_size, int):
- raise Exception('max_volume_size must be an integer')
+ if max_volume_size != None and (not isinstance(max_volume_size, int) or\
+ max_volume_size < 1):
+ raise Exception('max_volume_size must be a positive integer')
if max_volume_size != None:
max_volume_size = max_volume_size*1024*1024
- max_volume_size: maximum volume size in megabytes (MB). Used to split
the backup in volumes. Optional (won't split in volumes by default).
'''
- pass
+ # check/sanitize input
+ if not isinstance(source_path, basestring):
+ raise Exception('Source path must be a string')
+
+ if not isinstance(backup_path, basestring):
+ raise Exception('Backup path must be a string')
+
+ if not os.path.exists(source_path) or not os.path.isdir(source_path):
+ raise Exception('Source path "%s" does not exist or is not a '\
+ 'directory' % source_path)
+
+ if not os.access(source_path, os.R_OK):
+ raise Exception('Source path "%s" is not readable' % source_path)
+
+ if max_volume_size != None and (not isinstance(max_volume_size, int) or\
+ max_volume_size < 1):
+ raise Exception('max_volume_size must be a positive integer')
+ if max_volume_size != None:
+ max_volume_size = max_volume_size*1024*1024
+
+ if not isinstance(previous_index_path, basestring):
+ raise Exception('previous_index_path must be A string')
+
+ if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path):
+ raise Exception('Index path "%s" does not exist or is not a '\
+ 'file' % previous_index_path)
+
+ if not os.access(previous_index_path, os.R_OK):
+ raise Exception('Index path "%s" is not readable' % previous_index_path)
+
+ # try to create backup path if needed
+ if not os.path.exists(backup_path):
+ os.makedirs(backup_path)
+
+ if not os.access(backup_path, os.W_OK):
+ raise Exception('Backup path "%s" is not writeable' % backup_path)
+
+ if source_path.endswith('/'):
+ source_path = source_path[:-1]
+
+ if backup_path.endswith('/'):
+ backup_path = backup_path[:-1]
+
+ # update current time
+ self.current_time = datetime.datetime.now()
+
+ if self.mode not in self.__file_extensions_dict:
+ raise Exception('Unrecognized extension')
+
+ # some initialization
+ self.vol_no = 0
+
+ # generate the first volume name
+ vol_name = self.volume_name_func(backup_path, True, 0)
+ tarfile_path = os.path.join(backup_path, vol_name)
+
+ # init index
+ index_name = self.index_name_func(True)
+ index_path = os.path.join(backup_path, index_name)
+ # TODO: encrypt or compress it if necessary
+ index_fd = open(index_path, 'w')
+
+ cwd = os.getcwd()
+
+ def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
+ '''
+ Handles the new volumes
+ '''
+ volume_name = deltarobj.volume_name_func(backup_path, True, volume_number)
+ volume_path = os.path.join(backup_path, volume_name)
+ deltarobj.vol_no = volume_number
+
+ # we convert relative paths into absolute because CWD is changed
+ if not os.path.isabs(volume_path):
+ volume_path = os.path.join(cwd, volume_path)
+
+ tarobj.open_volume(volume_path)
+
+ # wraps some args from context into the handler
+ new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
+
+ index_fd.write('{"type": "python-delta-tar-index", "version": 1 }\n')
+
+ s = '{"type": "BEGIN-FILE-LIST"}\n'
+ # calculate checksum and write into the stream
+ crc = binascii.crc32(s)
+ index_fd.write(s)
+
+ # start creating the tarfile
+ tarobj = tarfile.TarFile.open(tarfile_path,
+ mode='w' + self.mode,
+ format=tarfile.GNU_FORMAT,
+ concat_compression='#gz' in self.mode,
+ password=self.password,
+ max_volume_size=max_volume_size,
+ new_volume_handler=new_volume_handler)
+
+ os.chdir(source_path)
+
+ # for each file to be in the backup, do:
+ for path in self._recursive_walk_dir('.'):
+ pass
+
+ def iterate_index_path(self, index_path):
+ # open
+ f = open(index_path, 'r')
+ # check index header
+ j, l_no = self._parse_json_line(f, -1)
+ if j.get("type", '') != 'python-delta-tar-index' or\
+ j.get('version', -1) != 1:
+ raise Exception("invalid index file format: %s" % json.dumps(j))
+
+ # find BEGIN-FILE-LIST, ignore other headers
+ while True:
+ j, l_no = self._parse_json_line(f, -1)
+ if j.get('type', '') == 'BEGIN-FILE-LIST':
+ break
+
+
+ # current volume number
+ curr_vol_no = None
+ # current volume file
+ vol_fd = None
+ offset = -1
+ tarobj = None
+
+ # read each file in the index and process it to do the retore
+ while True:
+ try:
+ j, l_no = self._parse_json_line(f, -1)
+ except Exception, e:
+ f.close()
+ raise e
+
+ op_type = j.get('type', '')
+
+ # when we detect the end of the list, break the loop
+ if op_type == 'END-FILE-LIST':
+ f.close()
+ break
+
+ # check input
+ if op_type not in ['directory', 'file', 'link']:
+ self.logger.warn('unrecognized type to be '
+ 'restored: %s, line %d' % (op_type, l_no))
+ continue
+
+ yield j, l_no
+
+ def jsonize_path_iterator(self, iter):
+ '''
+ converts the yielded items of an iterator into json path lines.
+ '''
+ while True:
+ try:
+ path = iter.next()
+ yield self._stat_dict(path)
+ except StopIteration:
+ break
+
+ def collate_iterators(self, it1, it2):
+ '''
+ Collate two iterators, so that it returns pairs of the items of each
+ iterator (if the items are the same), or (None, elem2) or (elem1, None)
+ when there's no match for the items in the other iterator.
+
+ It assumes that the items in both lists are ordered in the same way.
+ '''
+ elem1, elem2 = None, None
+ while True:
+ if not elem1:
+ try:
+ elem1 = it1.next()[0]
+ if isinstance(elem1, tuple):
+ elem1 = elem1[0]
+ except StopIteration:
+ if elem2:
+ yield (None, elem2)
+ for elem2 in it2:
+ yield (None, elem2)
+ break
+ index1 = elem1['path']
+ if not elem2:
+ try:
+ elem2 = it2.next()
+ if isinstance(elem2, tuple):
+ elem2 = elem2[0]
+ except StopIteration:
+ if elem1:
+ yield (elem1, None)
+ for elem1 in it1:
+ yield (elem1, None)
+ break
+ index2 = elem2['path']
+
+ if index1 < index2:
+ yield (elem1, None)
+ elem1 = None
+ elif index1 == index2:
+ yield (elem1, elem2)
+ elem1, elem2 = None, None
+ else:
+ # index2 is less
+ yield (None, elem2)
+ elem2 = None
def restore_backup(self, target_path, backup_indexes_paths=[],
backup_tar_path=None):
backup_path = os.path.dirname(backup_index_path)
new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
- # open
- f = open(backup_index_path, 'r')
-
- # check index header
- j, l_no = self._parse_json_line(f, -1)
- if j.get("type", '') != 'python-delta-tar-index' or\
- j.get('version', -1) != 1:
- raise Exception("invalid index file format: %s" % json.dumps(j))
-
- # find BEGIN-FILE-LIST, ignore other headers
- while True:
- j, l_no = self._parse_json_line(f, -1)
- if j.get('type', '') == 'BEGIN-FILE-LIST':
- break
-
+ # some initialization:
# current volume number
curr_vol_no = None
offset = -1
tarobj = None
- # read each file in the index and process it to do the retore
- while True:
- j, l_no = self._parse_json_line(f, -1)
+ # iterate through the
+ for j, l_no in self.iterate_index_path(backup_index_path):
op_type = j.get('type', '')
-
- # when we detect the end of the list, break the loop
- if op_type == 'END-FILE-LIST':
- break
-
- # check input
- if op_type not in ['directory', 'file', 'link']:
- self.logger.warn('unrecognized type to be '
- 'restored: %s, line %d' % (op_type, l_no))
- continue
-
- # filtering paths
op_path = j.get('path', '')
+
+ # filter paths
if self.filter_path(op_path, '.', op_type == 'directory') == NO_MATCH:
continue
+ # check volume number
vol_no = j.get('volume', -1)
if not isinstance(vol_no, int) or vol_no < 0:
self.logger.warn('unrecognized type to be '
tarobj.close()
tarobj = None
+ # open the tarfile if needed
if not tarobj:
vol_fd.seek(offset)
tarobj = tarfile.open(mode="r" + self.mode, fileobj=vol_fd,
assert not os.path.exists('source_dir/big')
assert not os.path.exists('source_dir/small')
+ def test_collate_iterators(self):
+ '''
+ Tests the collate iterators functionality with two exact directories,
+ using an index iterator from a backup and the exact same source dir.
+ '''
+ deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD,
+ logger=self.consoleLogger)
+
+ # create first backup
+ deltatar.create_full_backup(
+ source_path="source_dir",
+ backup_path="backup_dir")
+
+ assert os.path.exists("backup_dir")
+
+ cwd = os.getcwd()
+ index_filename = deltatar.index_name_func(is_full=True)
+ index_path = os.path.join(cwd, "backup_dir", index_filename)
+ index_it = deltatar.iterate_index_path(index_path)
+
+ os.chdir('source_dir')
+ dir_it = deltatar._recursive_walk_dir('.')
+ path_it = deltatar.jsonize_path_iterator(dir_it)
+
+ try:
+ for path1, path2 in deltatar.collate_iterators(index_it, path_it):
+ assert deltatar._equal_stat_dicts(path1, path2)
+ finally:
+ os.chdir(cwd)
+
+ def test_collate_iterators_diffdirs(self):
+ '''
+ Use the collate iterators functionality with two different directories.
+ It must behave in an expected way.
+ '''
+ self.hash["source_dir/zzzz"] = self.create_file("source_dir/zzzz", 100)
+
+ deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD,
+ logger=self.consoleLogger)
+
+ # create first backup
+ deltatar.create_full_backup(
+ source_path="source_dir",
+ backup_path="backup_dir")
+
+ assert os.path.exists("backup_dir")
+ self.hash["source_dir/z"] = self.create_file("source_dir/z", 100)
+
+ cwd = os.getcwd()
+ index_filename = deltatar.index_name_func(is_full=True)
+ index_path = os.path.join(cwd, "backup_dir", index_filename)
+ index_it = deltatar.iterate_index_path(index_path)
+
+ os.chdir('source_dir')
+ dir_it = deltatar._recursive_walk_dir('.')
+ path_it = deltatar.jsonize_path_iterator(dir_it)
+
+ try:
+ for path1, path2 in deltatar.collate_iterators(index_it, path_it):
+ if path2['path'] == './z':
+ assert not path1
+ else:
+ assert deltatar._equal_stat_dicts(path1, path2)
+ finally:
+ os.chdir(cwd)
+
class DeltaTar2Test(DeltaTarTest):
'''