initial implementation of diff backup and a simple unit test
authorEduardo Robles Elvira <edulix@wadobo.com>
Mon, 5 Aug 2013 11:59:49 +0000 (13:59 +0200)
committerEduardo Robles Elvira <edulix@wadobo.com>
Mon, 5 Aug 2013 11:59:49 +0000 (13:59 +0200)
deltatar/deltatar.py
testing/test_deltatar.py

index 06473da..6850087 100644 (file)
@@ -441,7 +441,7 @@ class DeltaTar(object):
         # wraps some args from context into the handler
         new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
 
-        index_fd.write('{"type": "python-delta-tar-index", "version": 1 }\n')
+        index_fd.write('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full" }\n')
 
         s = '{"type": "BEGIN-FILE-LIST"}\n'
         # calculate checksum and write into the stream
@@ -581,7 +581,7 @@ class DeltaTar(object):
         # wraps some args from context into the handler
         new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
 
-        index_fd.write('{"type": "python-delta-tar-index", "version": 1 }\n')
+        index_fd.write('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff" }\n')
 
         s = '{"type": "BEGIN-FILE-LIST"}\n'
         # calculate checksum and write into the stream
@@ -597,11 +597,90 @@ class DeltaTar(object):
                               max_volume_size=max_volume_size,
                               new_volume_handler=new_volume_handler)
 
+
+        # create the iterators, first the previous index iterator, then the
+        # source path directory iterator and collate and iterate them
+        if not os.path.isabs(previous_index_path):
+            previous_index_path = os.path.join(cwd, previous_index_path)
+        index_it = self.iterate_index_path(previous_index_path)
+
         os.chdir(source_path)
+        dir_it = self._recursive_walk_dir('.')
+        dir_path_it = self.jsonize_path_iterator(dir_it)
 
         # for each file to be in the backup, do:
-        for path in self._recursive_walk_dir('.'):
-            pass
+        for ipath, dpath in self.collate_iterators(index_it, dir_path_it):
+
+            action = None
+            # if file is not in the index, it means it's a new file, so we have
+            # to take a snapshot
+            if not ipath:
+                action = 'snapshot'
+            # if the file is not in the directory iterator, it means that it has
+            # been deleted, so we need to mark it as suck
+            elif not dpath:
+                action = 'delete'
+            # if the file is in both iterators, it means it might have either
+            # not changed (in which case we will just list it in our index but
+            # it will not be included in the tar file), or it might have
+            # changed, in which case we will list it.
+            elif ipath and dpath:
+                if self._equal_stat_dicts(ipath, dpath):
+                    action = 'list'
+                else:
+                    action = 'snapshot'
+            # TODO: when creating chained backups (i.e. diffing from another
+            # diff), we will need to detect the type of action in the previous
+            # index, because if it was delete and dpath is None, we should
+            # discard the file
+
+            if action == 'snapshot':
+                # calculate stat dict for current file
+                stat = dpath.copy()
+                stat['path'] = u"snapshot://" + dpath['path']
+                stat['volume'] = self.vol_no
+
+                # backup file
+                tarobj.add(stat['path'], arcname=dpath['path'], recursive=False)
+
+                # retrieve file offset
+                stat['offset'] = tarobj.get_last_member_offset()
+
+                # store in the index the stat dict
+                s = json.dumps(stat) + '\n'
+                crc = binascii.crc32(s, crc) & 0xffffffff
+                index_fd.write(s)
+            elif action == 'delete':
+                stat = {
+                    u'path': u'delete://' + ipath['path'],
+                    u'type': ipath['type']
+                }
+
+                # mark it as deleted in the backup
+                tarobj.add("/dev/null", arcname='delete://' + ipath['path'])
+
+                # store in the index the stat dict
+                s = json.dumps(stat) + '\n'
+                crc = binascii.crc32(s, crc) & 0xffffffff
+                index_fd.write(s)
+            elif action == 'list':
+                stat = dpath.copy()
+                stat['path'] = u'list://' + ipath['path']
+                # unchanged files do not enter in the backup, only in the index
+
+                # store in the index the stat dict
+                s = json.dumps(stat) + '\n'
+                crc = binascii.crc32(s, crc) & 0xffffffff
+                index_fd.write(s)
+
+        s = '{"type": "END-FILE-LIST"}\n'
+        crc = binascii.crc32(s, crc) & 0xffffffff
+        index_fd.write(s)
+        index_fd.write('{"type": "file-list-checksum", "checksum": %d}\n' %\
+                        crc)
+        index_fd.close()
+        os.chdir(cwd)
+        tarobj.close()
 
     def iterate_index_path(self, index_path):
         # open
index 9cd0aba..9c5d5c6 100644 (file)
@@ -45,7 +45,7 @@ class DeltaTarTest(BaseTest):
         '''
         Create base test data
         '''
-        os.system('rm -rf source_dir source_dir2 backup_dir huge')
+        os.system('rm -rf source_dir source_dir2 backup_dir backup_dir? huge')
         os.makedirs('source_dir/test/test2')
         self.hash = dict()
         self.hash["source_dir/test/test2"] = ''
@@ -777,6 +777,31 @@ class DeltaTarTest(BaseTest):
         finally:
             os.chdir(cwd)
 
+    def test_create_empty_diff_backup(self):
+        '''
+        Creates an empty (no changes) backup diff
+        '''
+        self.hash["source_dir/zzzz"]  = self.create_file("source_dir/zzzz", 100)
+
+        deltatar = DeltaTar(mode=self.MODE, password=self.PASSWORD,
+                            logger=self.consoleLogger)
+
+        # create first backup
+        deltatar.create_full_backup(
+            source_path="source_dir",
+            backup_path="backup_dir")
+
+        prev_index_filename = deltatar.index_name_func(is_full=True)
+        prev_index_path = os.path.join("backup_dir", prev_index_filename)
+
+        deltatar.create_diff_backup("source_dir", "backup_dir2",
+                                    prev_index_path)
+
+        # check index items
+        index_path = os.path.join("backup_dir2", prev_index_filename)
+        index_it = deltatar.iterate_index_path(index_path)
+        for i in index_it:
+            assert i[0]['path'].startswith("list://")
 
 class DeltaTar2Test(DeltaTarTest):
     '''