reducing leaks in tarfile by allowing not to store files in self.members
authorEduardo Robles Elvira <edulix@wadobo.com>
Thu, 3 Oct 2013 14:17:17 +0000 (16:17 +0200)
committerEduardo Robles Elvira <edulix@wadobo.com>
Thu, 3 Oct 2013 14:17:17 +0000 (16:17 +0200)
deltatar/deltatar.py
deltatar/tarfile.py

index d12938e..fcc09c5 100644 (file)
@@ -483,7 +483,6 @@ class DeltaTar(object):
                        enctype=enctype, password=self.password,
                        key_length=key_length)
 
-
     def create_full_backup(self, source_path, backup_path,
                            max_volume_size=None):
         '''
@@ -580,7 +579,8 @@ class DeltaTar(object):
                               concat_compression='#gz' in self.mode,
                               password=self.password,
                               max_volume_size=max_volume_size,
-                              new_volume_handler=new_volume_handler)
+                              new_volume_handler=new_volume_handler,
+                              save_to_members=False)
 
 
         os.chdir(source_path)
@@ -588,7 +588,6 @@ class DeltaTar(object):
         # for each file to be in the backup, do:
         for path in self._recursive_walk_dir('.'):
             # calculate stat dict for current file
-            # TODO: reduce paths length using previous dir entries
             stat = self._stat_dict(path)
             stat['path'] = u'snapshot://' + stat['path']
             stat['volume'] = self.vol_no
@@ -726,7 +725,8 @@ class DeltaTar(object):
                               concat_compression='#gz' in self.mode,
                               password=self.password,
                               max_volume_size=max_volume_size,
-                              new_volume_handler=new_volume_handler)
+                              new_volume_handler=new_volume_handler,
+                              save_to_members=False)
 
 
         # create the iterators, first the previous index iterator, then the
@@ -929,7 +929,8 @@ class DeltaTar(object):
                         format=tarfile.GNU_FORMAT,
                         concat_compression='#gz' in self.delta_tar.mode,
                         password=self.delta_tar.password,
-                        new_volume_handler=None)
+                        new_volume_handler=None,
+                        save_to_members=False)
                 return self
 
             def __exit__(self, type, value, tb):
@@ -1098,7 +1099,8 @@ class DeltaTar(object):
                             format=tarfile.GNU_FORMAT,
                             concat_compression='#gz' in self.mode,
                             password=self.password,
-                            new_volume_handler=new_volume_handler)
+                            new_volume_handler=new_volume_handler,
+                            save_to_members=False)
 
         def filter(cls, list_func, tarinfo):
             if list_func is None:
index 354d953..573e66d 100644 (file)
@@ -1771,11 +1771,15 @@ class TarFile(object):
 
     password = ''               # Used for aes encryption
 
+    save_to_members = True      # If new members are saved. This can be disabled
+                                # if you manage lots of files and don't want
+                                # to have high memory usage
+
     def __init__(self, name=None, mode="r", fileobj=None, format=None,
             tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
             errors=None, pax_headers=None, debug=None, errorlevel=None,
             max_volume_size=None, new_volume_handler=None,
-            concat_compression=False, password=''):
+            concat_compression=False, password='', save_to_members=True):
         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
            read from an existing archive, 'a' to append data to an existing
            file or 'w' to create a new file overwriting an existing one. `mode'
@@ -1844,6 +1848,7 @@ class TarFile(object):
             raise ValueError("new_volume_handler needs to be set and be callable for multivolume support")
 
         self.max_volume_size = max_volume_size
+        self.save_to_members = save_to_members
         self.new_volume_handler = new_volume_handler
         self.closed = False
         self.members = []       # list of members as TarInfo objects
@@ -2368,7 +2373,8 @@ class TarFile(object):
 
         # If there's no data to follow, finish
         if not fileobj:
-            self.members.append(tarinfo)
+            if self.save_to_members:
+                self.members.append(tarinfo)
             return
 
         # handle multivolume support
@@ -2437,7 +2443,8 @@ class TarFile(object):
                 size_left = self._size_left()
                 max_size_to_write = min(size_left, tarinfo.size - tarinfo.volume_offset)
 
-        self.members.append(tarinfo)
+        if self.save_to_members:
+            self.members.append(tarinfo)
 
     def open_volume(self, name="", fileobj=None):
         '''
@@ -2865,7 +2872,8 @@ class TarFile(object):
             break
 
         if tarinfo is not None:
-            self.members.append(tarinfo)
+            if self.save_to_members:
+                self.members.append(tarinfo)
         else:
             self._loaded = True