sync tarfile stream diligently when writing new objects
authorPhilipp Gesang <philipp.gesang@intra2net.com>
Fri, 11 Aug 2017 08:25:12 +0000 (10:25 +0200)
committerThomas Jarosch <thomas.jarosch@intra2net.com>
Mon, 2 Apr 2018 11:34:09 +0000 (13:34 +0200)
Turns out all the offsets written to the index when neither
encrypting nor compressing were, well, … off. In fact they would
only be updated at tar block boundaries due to buffering. Since
“last_block_offset” record keeping blatantly violates layering
boundaries, it would only work reliably with the concat
compression and encryption modes that do the same.

Sync when adding a new object so we get the accurate offset
value. Voilà, recovery now works with uncompressed and
unencrypted archives as well

deltatar/deltatar.py
deltatar/tarfile.py
runtests.py
testing/test_recover.py

index b247fdb..1b15a0a 100644 (file)
@@ -1031,7 +1031,7 @@ class DeltaTar(object):
                 self.f = None
 
             def __next__(self):
-                # read each file in the index and process it to do the retore
+                # read each file in the index and process it to do the restore
                 j = {}
                 l_no = -1
                 try:
index d67b4fd..49678cc 100644 (file)
@@ -592,13 +592,15 @@ class _Stream:
         if self.arcmode & ARCMODE_COMPRESS:
             if getattr (self, "cmp", None) is not None:
                 self._finalize_write_gz ()
+        self.__sync()
+        if self.arcmode & ~(ARCMODE_ENCRYPT | ARCMODE_COMPRESS):
+            self.last_block_offset = self.fileobj.tell()
         if self.arcmode & ARCMODE_ENCRYPT:
-            self.__sync ()
             self._finalize_write_encrypt ()
             self._init_write_encrypt (name, set_last_block_offset=True)
         if self.arcmode & ARCMODE_COMPRESS:
             self._init_write_gz (set_last_block_offset =
-                                   not (self.arcmode & ARCMODE_ENCRYPT))
+                                 not (self.arcmode & ARCMODE_ENCRYPT))
         return self.last_block_offset
 
 
index d1ed888..2ed85cb 100755 (executable)
@@ -26,6 +26,7 @@ from testing.test_recover import \
       RecoverCorruptPayloadTest \
     , RecoverCorruptPayloadGZTest \
     , RecoverCorruptPayloadGZAESTest \
+    , RecoverCorruptHeaderTest \
     , RecoverCorruptHeaderGZTest \
     , RecoverCorruptHeaderGZAESTest
 from testing.test_rescue_tar import RescueTarTest
@@ -65,6 +66,7 @@ if __name__ == "__main__":
                          , RecoverCorruptPayloadTest
                          , RecoverCorruptPayloadGZTest
                          , RecoverCorruptPayloadGZAESTest
+                         , RecoverCorruptHeaderTest
                          , RecoverCorruptHeaderGZTest
                          , RecoverCorruptHeaderGZAESTest
                          ]:
index 5ecfb1f..fd85707 100644 (file)
@@ -224,7 +224,7 @@ class RecoverTest (BaseTest):
                                          "%s/%s" % (bak_path, index_file)
                                      ])
 
-        print ("¤¤¤ failed", failed)
+        print("¤¤¤ failed", failed)
         assert len (failed) == self.FAILURES
 
         # with one file missing
@@ -258,6 +258,13 @@ class RecoverCorruptPayloadGZAESTest (RecoverTest):
     FAILURES    = 1
 
 
+class RecoverCorruptHeaderTest (RecoverTest):
+    COMPRESSION = None
+    PASSWORD    = None
+    FAILURES    = 1
+    CORRUPT     = corrupt_header
+
+
 class RecoverCorruptHeaderGZTest (RecoverTest):
     COMPRESSION = "#gz"
     PASSWORD    = None