From aae127d02e333010e37e7dc49e67630fff29d700 Mon Sep 17 00:00:00 2001 From: Eduardo Robles Elvira Date: Mon, 5 Aug 2013 13:59:49 +0200 Subject: [PATCH] initial implementation of diff backup and a simple unit test --- deltatar/deltatar.py | 87 +++++++++++++++++++++++++++++++++++++++++++-- testing/test_deltatar.py | 27 ++++++++++++++- 2 files changed, 109 insertions(+), 5 deletions(-) diff --git a/deltatar/deltatar.py b/deltatar/deltatar.py index 06473da..6850087 100644 --- a/deltatar/deltatar.py +++ b/deltatar/deltatar.py @@ -441,7 +441,7 @@ class DeltaTar(object): # wraps some args from context into the handler new_volume_handler = partial(new_volume_handler, self, cwd, backup_path) - index_fd.write('{"type": "python-delta-tar-index", "version": 1 }\n') + index_fd.write('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full" }\n') s = '{"type": "BEGIN-FILE-LIST"}\n' # calculate checksum and write into the stream @@ -581,7 +581,7 @@ class DeltaTar(object): # wraps some args from context into the handler new_volume_handler = partial(new_volume_handler, self, cwd, backup_path) - index_fd.write('{"type": "python-delta-tar-index", "version": 1 }\n') + index_fd.write('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff" }\n') s = '{"type": "BEGIN-FILE-LIST"}\n' # calculate checksum and write into the stream @@ -597,11 +597,90 @@ class DeltaTar(object): max_volume_size=max_volume_size, new_volume_handler=new_volume_handler) + + # create the iterators, first the previous index iterator, then the + # source path directory iterator and collate and iterate them + if not os.path.isabs(previous_index_path): + previous_index_path = os.path.join(cwd, previous_index_path) + index_it = self.iterate_index_path(previous_index_path) + os.chdir(source_path) + dir_it = self._recursive_walk_dir('.') + dir_path_it = self.jsonize_path_iterator(dir_it) # for each file to be in the backup, do: - for path in self._recursive_walk_dir('.'): - pass + for ipath, dpath in self.collate_iterators(index_it, dir_path_it): + + action = None + # if file is not in the index, it means it's a new file, so we have + # to take a snapshot + if not ipath: + action = 'snapshot' + # if the file is not in the directory iterator, it means that it has + # been deleted, so we need to mark it as suck + elif not dpath: + action = 'delete' + # if the file is in both iterators, it means it might have either + # not changed (in which case we will just list it in our index but + # it will not be included in the tar file), or it might have + # changed, in which case we will list it. + elif ipath and dpath: + if self._equal_stat_dicts(ipath, dpath): + action = 'list' + else: + action = 'snapshot' + # TODO: when creating chained backups (i.e. diffing from another + # diff), we will need to detect the type of action in the previous + # index, because if it was delete and dpath is None, we should + # discard the file + + if action == 'snapshot': + # calculate stat dict for current file + stat = dpath.copy() + stat['path'] = u"snapshot://" + dpath['path'] + stat['volume'] = self.vol_no + + # backup file + tarobj.add(stat['path'], arcname=dpath['path'], recursive=False) + + # retrieve file offset + stat['offset'] = tarobj.get_last_member_offset() + + # store in the index the stat dict + s = json.dumps(stat) + '\n' + crc = binascii.crc32(s, crc) & 0xffffffff + index_fd.write(s) + elif action == 'delete': + stat = { + u'path': u'delete://' + ipath['path'], + u'type': ipath['type'] + } + + # mark it as deleted in the backup + tarobj.add("/dev/null", arcname='delete://' + ipath['path']) + + # store in the index the stat dict + s = json.dumps(stat) + '\n' + crc = binascii.crc32(s, crc) & 0xffffffff + index_fd.write(s) + elif action == 'list': + stat = dpath.copy() + stat['path'] = u'list://' + ipath['path'] + # unchanged files do not enter in the backup, only in the index + + # store in the index the stat dict + s = json.dumps(stat) + '\n' + crc = binascii.crc32(s, crc) & 0xffffffff + index_fd.write(s) + + s = '{"type": "END-FILE-LIST"}\n' + crc = binascii.crc32(s, crc) & 0xffffffff + index_fd.write(s) + index_fd.write('{"type": "file-list-checksum", "checksum": %d}\n' %\ + crc) + index_fd.close() + os.chdir(cwd) + tarobj.close() def iterate_index_path(self, index_path): # open diff --git a/testing/test_deltatar.py b/testing/test_deltatar.py index 9cd0aba..9c5d5c6 100644 --- a/testing/test_deltatar.py +++ b/testing/test_deltatar.py @@ -45,7 +45,7 @@ class DeltaTarTest(BaseTest): ''' Create base test data ''' - os.system('rm -rf source_dir source_dir2 backup_dir huge') + os.system('rm -rf source_dir source_dir2 backup_dir backup_dir? huge') os.makedirs('source_dir/test/test2') self.hash = dict() self.hash["source_dir/test/test2"] = '' @@ -777,6 +777,31 @@ class DeltaTarTest(BaseTest): finally: os.chdir(cwd) + def test_create_empty_diff_backup(self): + ''' + Creates an empty (no changes) backup diff + ''' + self.hash["source_dir/zzzz"] = self.create_file("source_dir/zzzz", 100) + + deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD, + logger=self.consoleLogger) + + # create first backup + deltatar.create_full_backup( + source_path="source_dir", + backup_path="backup_dir") + + prev_index_filename = deltatar.index_name_func(is_full=True) + prev_index_path = os.path.join("backup_dir", prev_index_filename) + + deltatar.create_diff_backup("source_dir", "backup_dir2", + prev_index_path) + + # check index items + index_path = os.path.join("backup_dir2", prev_index_filename) + index_it = deltatar.iterate_index_path(index_path) + for i in index_it: + assert i[0]['path'].startswith("list://") class DeltaTar2Test(DeltaTarTest): ''' -- 1.7.1