initial working tar multivolume creation implementation
authorEduardo Robles Elvira <edulix@wadobo.com>
Mon, 17 Jun 2013 09:23:40 +0000 (11:23 +0200)
committerEduardo Robles Elvira <edulix@wadobo.com>
Mon, 17 Jun 2013 09:23:40 +0000 (11:23 +0200)
tarfile.py
tarfile_multivol_example.py

index fe12671..60c9dde 100644 (file)
@@ -1019,6 +1019,16 @@ class TarInfo(object):
         """
         info["magic"] = GNU_MAGIC
 
+        if self.ismultivol():
+            prefix = [
+                itn(info.get("atime", 0), 12, GNU_FORMAT),
+                itn(info.get("ctime", 0), 12, GNU_FORMAT),
+                itn(self.offset_data, 12, GNU_FORMAT),
+                itn(0, 119, GNU_FORMAT), # stuff unused in this tar implementation, set to zero
+            ]
+            info['prefix'] = "".join(prefix)
+            info['size'] = info['size'] - self.offset_data
+
         buf = ""
         if len(info["linkname"]) > LENGTH_LINK:
             buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
@@ -1113,7 +1123,7 @@ class TarInfo(object):
             itn(info.get("size", 0), 12, format),
             itn(info.get("mtime", 0), 12, format),
             "        ", # checksum field
-            info.get("type", REGTYPE), # TODO change to GNUTYPE_MULTIVOL when appropriate
+            info.get("type", REGTYPE),
             stn(info.get("linkname", ""), 100),
             stn(info.get("magic", POSIX_MAGIC), 8),
             stn(info.get("uname", ""), 32),
@@ -1541,7 +1551,7 @@ class TarFile(object):
             if hasattr(fileobj, "mode"):
                 self._mode = fileobj.mode
             self._extfileobj = True
-        self.name = os.path.abspath(name) if name else None
+        self.base_name = self.name = os.path.abspath(name) if name else None
         self.fileobj = fileobj
 
         # Init attributes.
@@ -2037,33 +2047,36 @@ class TarFile(object):
             self.members.append(tarinfo)
             return
 
-        is_multivol = False
-
         # handle multivolume support
         if self.max_volume_size:
-            size_left = self.max_volume_size - self.offset
-            max_size_to_write = min(size_left, tarinfo.size)
+            size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset
+            max_size_to_write = min(size_left, tarinfo.size - tarinfo.offset_data)
         else:
             size_left = max_size_to_write = tarinfo.size
 
+        data_written = 0
+
         # iterate, one iteration per volume (usually only one volume)
-        while size_left > 0:
-            if is_multivol:
-                buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
-                self.fileobj.write(buf)
-                self.offset += len(buf)
+        while tarinfo.offset_data < tarinfo.size:
             copyfileobj(fileobj, self.fileobj, max_size_to_write)
             blocks, remainder = divmod(max_size_to_write, BLOCKSIZE)
-            if remainder > 0:
+
+            # only fill with zeros the remainder in a block if it's not
+            # going to be a file splitted in multiple volumes.
+            # if file is going to be split in multiple volumes, having a
+            # remainder means that there's no more space left for a block, so
+            # we already need to create a new volume
+            if max_size_to_write == tarinfo.size and remainder > 0:
                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
                 blocks += 1
+
             self.offset += blocks * BLOCKSIZE
-            size_left = self.max_volume_size - self.offset
+            tarinfo.offset_data += blocks * BLOCKSIZE
+            size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset
 
-            # create new volume is needed
-            if size_left <= 0:
-                tarinfo.offset_data += blocks * BLOCKSIZE
-                tarinfo.type == GNUTYPE_MULTIVOL
+            # check if creating a new volume is needed
+            if size_left <= BLOCKSIZE:
+                tarinfo.type = GNUTYPE_MULTIVOL
 
                 if not self.new_volume_handler or\
                     not callable(self.new_volume_handler):
@@ -2073,8 +2086,15 @@ class TarFile(object):
                 # the new volume handler should do everything needed to
                 # start working in a new volume. usually, the handler calls
                 # to self.open_volume
-                self.new_volume_handler(self)
-                is_multivol = True
+                self.volume_number += 1
+                self.new_volume_handler(self, self.base_name, self.volume_number)
+
+                # write new volume header
+                buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
+                self.offset += len(buf)
+                self.fileobj.write(buf)
+                size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset
+                max_size_to_write = min(size_left, tarinfo.size - tarinfo.offset_data)
 
         self.members.append(tarinfo)
 
index d8e36f2..a902bca 100644 (file)
@@ -22,16 +22,16 @@ call to _Stream object to handle the new file event, in this case for reading.
 
 from tarfile import TarFile
 
-def new_volume_handler(tarobj):
-    volume_path = "%s.%d" % (tarobj.name, tarobj.volume_number + 1)
+def new_volume_handler(tarobj, base_name, volume_number):
+    volume_path = "%s.%d" % (base_name, volume_number)
     print "new volume: ", volume_path
     tarobj.open_volume(volume_path)
 
 
 # write
-tar = TarFile.open("sample.tar.gz",
-                   mode="w|gz",
-                   max_volume_size=1024**2,
+tar = TarFile.open("sample.tar",
+                   mode="w",
+                   max_volume_size=(1024**2)*4,
                    new_volume_handler=new_volume_handler)
 tar.add("big")
 tar.close()