deltatar: standarizing using a prefix for all kind of file paths in all types of...
authorEduardo Robles Elvira <edulix@wadobo.com>
Mon, 5 Aug 2013 13:39:48 +0000 (15:39 +0200)
committerEduardo Robles Elvira <edulix@wadobo.com>
Mon, 5 Aug 2013 13:39:48 +0000 (15:39 +0200)
deltatar/deltatar.py
testing/test_deltatar.py

index 6850087..5e93a25 100644 (file)
@@ -91,6 +91,13 @@ class DeltaTar(object):
         '#gz.aes256': '.gz.aes256'
     }
 
+    # valid path prefixes
+    __path_prefix_list = [
+        u'snapshot://',
+        u'list://',
+        u'delete://'
+    ]
+
     def __init__(self, excluded_files=[], included_files=[],
                  filter_func=None, mode="", password=None,
                  logger=None,
@@ -351,13 +358,38 @@ class DeltaTar(object):
         '''
         Return if the dicts are equal in the stat keys
         '''
-        keys = [u'gid', u'type', u'mode', u'mtime', u'path', u'size', u'inode',
+        keys = [u'gid', u'type', u'mode', u'mtime', u'size', u'inode',
                 u'ctime', u'uid']
+
+        if d1 is None and d2 is not None or d1 is not None and d2 is None:
+            return False
+
+        if self.prefixed(d1.get('path', -1)) != self.prefixed(d2.get('path', -2)):
+            return False
+
         for key in keys:
             if d1.get(key, -1) != d2.get(key, -2):
                 return False
         return True
 
+    def prefixed(self, path):
+        '''
+        if a path is not prefixed, return it prefixed
+        '''
+        for prefix in self.__path_prefix_list:
+            if path.startswith(prefix):
+                return path
+        return u'snapshot://' + path
+
+    def unprefixed(self, path):
+        '''
+        remove a path prefix if any
+        '''
+        for prefix in self.__path_prefix_list:
+            if path.startswith(prefix):
+                return path[len(prefix):]
+        return path
+
     def create_full_backup(self, source_path, backup_path,
                            max_volume_size=None):
         '''
@@ -463,11 +495,13 @@ class DeltaTar(object):
         # for each file to be in the backup, do:
         for path in self._recursive_walk_dir('.'):
             # calculate stat dict for current file
-            stat = self._stat_dict(path) # TODO: reduce paths length using previous dir entries
+            # TODO: reduce paths length using previous dir entries
+            stat = self._stat_dict(path)
+            stat['path'] = u'snapshot://' + stat['path']
             stat['volume'] = self.vol_no
 
             # backup file
-            tarobj.add(path, recursive=False)
+            tarobj.add(path, arcname = stat['path'], recursive=False)
 
             # retrieve file offset
             stat['offset'] = tarobj.get_last_member_offset()
@@ -610,7 +644,6 @@ class DeltaTar(object):
 
         # for each file to be in the backup, do:
         for ipath, dpath in self.collate_iterators(index_it, dir_path_it):
-
             action = None
             # if file is not in the index, it means it's a new file, so we have
             # to take a snapshot
@@ -641,7 +674,7 @@ class DeltaTar(object):
                 stat['volume'] = self.vol_no
 
                 # backup file
-                tarobj.add(stat['path'], arcname=dpath['path'], recursive=False)
+                tarobj.add(dpath['path'], arcname=stat['path'], recursive=False)
 
                 # retrieve file offset
                 stat['offset'] = tarobj.get_last_member_offset()
@@ -760,7 +793,7 @@ class DeltaTar(object):
                     for elem2 in it2:
                         yield (None, elem2)
                     break
-                index1 = elem1['path']
+                index1 = self.unprefixed(elem1['path'])
             if not elem2:
                 try:
                     elem2 = it2.next()
@@ -772,7 +805,7 @@ class DeltaTar(object):
                     for elem1 in it1:
                         yield (elem1, None)
                     break
-                index2 = elem2['path']
+                index2 = self.unprefixed(elem2['path'])
 
             if index1 < index2:
                 yield (elem1, None)
@@ -876,6 +909,8 @@ class DeltaTar(object):
             os.chdir(target_path)
 
             def filter(cls, tarinfo):
+                tarinfo.path = self.unprefixed(tarinfo.path)
+                tarinfo.name = self.unprefixed(tarinfo.name)
                 return cls.filter_path(tarinfo.path, '.', tarinfo.isdir()) != NO_MATCH
             filter = partial(filter, self)
 
@@ -904,13 +939,14 @@ class DeltaTar(object):
             offset = -1
             tarobj = None
 
-            # iterate through the
+            # iterate through the items to be restored
             for j, l_no in self.iterate_index_path(backup_index_path):
                 op_type = j.get('type', '')
                 op_path  = j.get('path', '')
+                upath = self.unprefixed(op_path)
 
                 # filter paths
-                if self.filter_path(op_path, '.', op_type == 'directory') == NO_MATCH:
+                if self.filter_path(upath, '.', op_type == 'directory') == NO_MATCH:
                     continue
 
                 # check volume number
@@ -952,6 +988,8 @@ class DeltaTar(object):
                                 new_volume_handler=new_volume_handler)
                     member = tarobj.next()
 
+                member.path = upath
+                member.name = upath
                 # finally, restore the file
                 tarobj.extract(member)
 
index 9c5d5c6..595d5f3 100644 (file)
@@ -208,7 +208,8 @@ class DeltaTarTest(BaseTest):
         f = open(index_path, 'r')
         for l in f.readline():
             data = json.loads(f.readline())
-            if data.get('type', '') == 'file' and data['path'] == "./huge":
+            if data.get('type', '') == 'file' and\
+                    deltatar.unprefixed(data['path']) == "./huge":
                 offset = data['offset']
                 break
 
@@ -224,7 +225,10 @@ class DeltaTarTest(BaseTest):
                               concat_compression=True,
                               new_volume_handler=new_volume_handler,
                               password=self.PASSWORD)
-        tarobj.extract(tarobj.next())
+        member = tarobj.next()
+        member.path = deltatar.unprefixed(member.path)
+        member.name = deltatar.unprefixed(member.name)
+        tarobj.extract(member)
         tarobj.close()
         assert self.hash['source_dir2/huge'] == self.md5sum('huge')
 
@@ -803,6 +807,11 @@ class DeltaTarTest(BaseTest):
         for i in index_it:
             assert i[0]['path'].startswith("list://")
 
+        tar_filename = deltatar.volume_name_func('backup_dir', True, 0)
+        tar_path = os.path.join("backup_dir", tar_filename)
+
+        # check the tar file
+
 class DeltaTar2Test(DeltaTarTest):
     '''
     Same as DeltaTar but with specific ":" mode