removing some unnecesary over-optimizations like running the gc manually or deleting...
authorEduardo Robles Elvira <edulix@wadobo.com>
Sat, 12 Oct 2013 08:18:00 +0000 (10:18 +0200)
committerEduardo Robles Elvira <edulix@wadobo.com>
Sat, 12 Oct 2013 08:18:00 +0000 (10:18 +0200)
optimization memory usage during restore by saving dir perms in a specific class with only the needed data

backup.py
deltatar/deltatar.py

index f9c1806..41deb77 100644 (file)
--- a/backup.py
+++ b/backup.py
@@ -53,7 +53,7 @@ if __name__ == "__main__":
     #tracemalloc.enable()
     #top = tracemalloc.DisplayTop(25)
     #top.show_lineno = True
-    #top.start(60)
+    #top.start(20)
     parser = argparse.ArgumentParser()
 
     parser.add_argument("-m", "--mode", default='',
index 872f9cf..01f6872 100644 (file)
@@ -596,29 +596,22 @@ class DeltaTar(object):
         os.chdir(source_path)
 
         # for each file to be in the backup, do:
-        i = 0
         for path in self._recursive_walk_dir('.'):
-            i += 1
-            if i % 2000 == 0:
-                import gc
-                gc.collect()
-
             # calculate stat dict for current file
-            stat = self._stat_dict(path)
-            stat['path'] = u'snapshot://' + stat['path']
-            stat['volume'] = self.vol_no
+            statd = self._stat_dict(path)
+            statd['path'] = u'snapshot://' + statd['path']
+            statd['volume'] = self.vol_no
 
             # backup file
-            tarobj.add(path, arcname = stat['path'], recursive=False)
+            tarobj.add(path, arcname = statd['path'], recursive=False)
 
             # retrieve file offset
-            stat['offset'] = tarobj.get_last_member_offset()
+            statd['offset'] = tarobj.get_last_member_offset()
 
             # store in the index the stat dict
-            s = json.dumps(stat) + '\n'
+            s = json.dumps(statd) + '\n'
             crc = binascii.crc32(s, crc) & 0xffffffff
             index_fd.write(s)
-            del stat
 
         s = '{"type": "END-FILE-LIST"}\n'
         crc = binascii.crc32(s, crc) & 0xffffffff
@@ -1316,6 +1309,16 @@ class RestoreHelper(object):
         self._cwd = cwd
         self._index_list = index_list
 
+        try:
+            import grp, pwd
+        except ImportError:
+            grp = pwd = None
+
+        if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
+            self.canchown = True
+        else:
+            self.canchown = False
+
         if index_list:
             for index in index_list:
                 is_full = (index == index_list[-1])
@@ -1424,10 +1427,6 @@ class RestoreHelper(object):
         cur_index = 1
         while cur_index < len(self._data):
             data = self._data[cur_index]
-            # NOTE: we restart the iterator each time instead of reusing it
-            # because the iterator can be walked over completely multiple times,
-            # for example if one path if not found in one index and we have to
-            # go to the next index.
             d, l_no, dpath = self.find_path_in_index(data, upath)
             if not d:
                 # file not found, so it's not in the index, so it must be
@@ -1516,7 +1515,7 @@ class RestoreHelper(object):
             try:
                 os.chmod(dirpath, member.mode)
                 os.utime(dirpath, (member.mtime, member.mtime))
-                if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
+                if self.canchown:
                     # We have to be root to do so.
                     try:
                         g = grp.getgrnam(member.gname)[2]
@@ -1551,7 +1550,7 @@ class RestoreHelper(object):
             volume_path = os.path.join(cwd, volume_path)
         tarobj.open_volume(volume_path)
 
-    def restore_file(self, file_data, index_data, path, l_no, unprefixed):
+    def restore_file(self, file_data, index_data, path, l_no, unprefixed_path):
         '''
         Restores a snapshot of a file from a specific backup
         '''
@@ -1601,15 +1600,16 @@ class RestoreHelper(object):
                     format=tarfile.GNU_FORMAT,
                     concat_compression='#gz' in self._deltatar.mode,
                     password=self._deltatar.password,
-                    new_volume_handler=index_data['new_volume_handler'])
+                    new_volume_handler=index_data['new_volume_handler'],
+                    save_to_members=False)
 
                 member = index_data['tarobj'].next()
 
-        member.path = unprefixed
-        member.name = unprefixed
+        member.path = unprefixed_path
+        member.name = unprefixed_path
 
         if op_type == 'directory':
-            self._directories.append(member)
+            self.add_member_dir(member)
             member = copy.copy(member)
             member.mode = 0700
 
@@ -1625,3 +1625,20 @@ class RestoreHelper(object):
 
         # finally, restore the file
         index_data['tarobj'].extract(member)
+
+    def add_member_dir(self, member):
+        '''
+        Add member dir to be restored at the end
+        '''
+        if self.canchown:
+            self._directories.append(DirItem(name=member.name, mode=member.mode,
+                mtime=member.mtime))
+        else:
+            self._directories.append(DirItem(name=member.name, mode=member.mode,
+                mtime=member.mtime, gname=member.gname, uname=member.uname,
+                uid=member.uid, gid=member.gid))
+
+class DirItem(object):
+    def __init__(self, **kwargs):
+        for k, v in kwargs.iteritems():
+            setattr(self, k, v)
\ No newline at end of file