delay index file write until backup is complete
authorPhilipp Gesang <philipp.gesang@intra2net.com>
Mon, 3 Apr 2017 14:53:46 +0000 (16:53 +0200)
committerThomas Jarosch <thomas.jarosch@intra2net.com>
Mon, 2 Apr 2018 11:34:08 +0000 (13:34 +0200)
Due to restrictions of the file counting in the encryption
module, files must be handled in a strictly sequential manner.
Thus, postpone the creation of and all writes to the index file
until after all other files have been processed.

deltatar/deltatar.py

index 924c134..ea4799b 100644 (file)
 
 # Author: Eduardo Robles Elvira <edulix@wadobo.com>
 
-I2N_XXX_ENCRYPTION_VERSION = 1
+DELTATAR_HEADER_VERSION    = 1
+DELTATAR_PARAMETER_VERSION = 1
 
 import logging
 import datetime
 import binascii
+import io
 import operator
 import os
 import copy
@@ -587,7 +589,7 @@ class DeltaTar(object):
         # setup for encrypting payload
         if self.password is not None:
             self.crypto_ctx = crypto.Encrypt (self.password,
-                                              paramversion=I2N_XXX_ENCRYPTION_VERSION)
+                                              paramversion=DELTATAR_PARAMETER_VERSION)
 
         # some initialization
         self.vol_no = 0
@@ -596,12 +598,7 @@ class DeltaTar(object):
         vol_name = self.volume_name_func(backup_path, True, 0)
         tarfile_path = os.path.join(backup_path, vol_name)
 
-        # init index
-        index_name = self.index_name_func(True)
-        index_path = os.path.join(backup_path, index_name)
-        index_fd = self.open_auxiliary_file(index_path, 'w') # **NOT** an fd
-        if index_fd.encryption is not None:
-            self.nacl = index_fd.encryption.nacl
+        index_accu = io.BytesIO ()
 
         cwd = os.getcwd()
 
@@ -627,12 +624,12 @@ class DeltaTar(object):
         # wraps some args from context into the handler
         new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
 
-        index_fd.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
+        index_accu.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
 
         s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
         # calculate checksum and write into the stream
         crc = binascii.crc32(s) & 0xFFFFffff
-        index_fd.write(s)
+        index_accu.write(s)
 
         # start creating the tarfile
         tarobj = tarfile.TarFile.open(tarfile_path,
@@ -663,17 +660,28 @@ class DeltaTar(object):
             # store the stat dict in the index
             s = bytes(json.dumps(statd) + '\n', 'UTF-8')
             crc = binascii.crc32(s, crc) & 0xffffffff
-            index_fd.write(s)
+            index_accu.write(s)
 
         s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
         crc = binascii.crc32(s, crc) & 0xffffffff
-        index_fd.write(s)
+        index_accu.write(s)
         s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
-        index_fd.write(s)
-        index_fd.close()
+        index_accu.write(s)
         os.chdir(cwd)
         tarobj.close()
 
+        # init index
+        index_name = self.index_name_func(True)
+        index_path = os.path.join(backup_path, index_name)
+        index_sink = self.open_auxiliary_file(index_path, 'w') # **NOT** an fd
+        if index_sink.encryption is not None:
+            self.nacl = index_sink.encryption.nacl
+            dummyhdr = index_sink.encryption.next \
+                        (path, version=DELTATAR_HEADER_VERSION, nacl=self.nacl)
+            index_sink.write (dummyhdr)
+        index_sink.write (index_accu.getvalue ())
+        index_sink.close ()
+
     def create_diff_backup(self, source_path, backup_path, previous_index_path,
                            max_volume_size=None, extra_data=dict()):
         '''
@@ -751,7 +759,7 @@ class DeltaTar(object):
         # setup for encrypting payload
         if self.password is not None:
             self.crypto_ctx = crypto.Encrypt (self.password,
-                                              paramversion=I2N_XXX_ENCRYPTION_VERSION)
+                                              paramversion=DELTATAR_PARAMETER_VERSION)
         # some initialization
         self.vol_no = 0
 
@@ -760,13 +768,10 @@ class DeltaTar(object):
                                          volume_number=0)
         tarfile_path = os.path.join(backup_path, vol_name)
 
-        # init index
-        index_name = self.index_name_func(is_full=False)
-        index_path = os.path.join(backup_path, index_name)
-        index_fd = self.open_auxiliary_file(index_path, 'w') # **NOT** an fd
-        if index_fd.encryption is not None:
-            self.nacl = index_fd.encryption.nacl
+        # postpone creation of index file to accomodate encryption
+        index_accu = io.BytesIO ()
 
+        # init index
         cwd = os.getcwd()
 
         def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number):
@@ -788,12 +793,12 @@ class DeltaTar(object):
         # wraps some args from context into the handler
         new_volume_handler = partial(new_volume_handler, self, cwd, backup_path)
 
-        index_fd.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
+        index_accu.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8'))
 
         s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8')
         # calculate checksum and write into the stream
         crc = binascii.crc32(s) & 0xFFFFffff
-        index_fd.write(s)
+        index_accu.write(s)
 
         # start creating the tarfile
         tarobj = tarfile.TarFile.open(tarfile_path,
@@ -888,18 +893,25 @@ class DeltaTar(object):
                 # store the stat dict in the index
                 s = bytes(json.dumps(stat) + '\n', 'UTF-8')
                 crc = binascii.crc32(s, crc) & 0xffffffff
-                index_fd.write(s)
+                index_accu.write(s)
 
         s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8')
         crc = binascii.crc32(s, crc) & 0xffffffff
-        index_fd.write(s)
+        index_accu.write(s)
         s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8')
-        index_fd.write(s)
-        index_fd.close()
+        index_accu.write(s)
+
         index_it.release()
         os.chdir(cwd)
         tarobj.close()
 
+        index_name = self.index_name_func(is_full=False)
+        index_path = os.path.join(backup_path, index_name)
+        index_sink = self.open_auxiliary_file(index_path, 'w')
+        index_sink.write(index_accu.getvalue ())
+        index_sink.close()
+
+
     def iterate_index_path(self, index_path):
         '''
         Returns an index iterator. Internally, it uses a classic iterator class.