From 2f854e77f11ba920b98c6062478c95aca98321c3 Mon Sep 17 00:00:00 2001 From: Eduardo Robles Elvira Date: Mon, 17 Jun 2013 11:23:40 +0200 Subject: [PATCH] initial working tar multivolume creation implementation --- tarfile.py | 58 +++++++++++++++++++++++++++++-------------- tarfile_multivol_example.py | 10 +++--- 2 files changed, 44 insertions(+), 24 deletions(-) diff --git a/tarfile.py b/tarfile.py index fe12671..60c9dde 100644 --- a/tarfile.py +++ b/tarfile.py @@ -1019,6 +1019,16 @@ class TarInfo(object): """ info["magic"] = GNU_MAGIC + if self.ismultivol(): + prefix = [ + itn(info.get("atime", 0), 12, GNU_FORMAT), + itn(info.get("ctime", 0), 12, GNU_FORMAT), + itn(self.offset_data, 12, GNU_FORMAT), + itn(0, 119, GNU_FORMAT), # stuff unused in this tar implementation, set to zero + ] + info['prefix'] = "".join(prefix) + info['size'] = info['size'] - self.offset_data + buf = "" if len(info["linkname"]) > LENGTH_LINK: buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK) @@ -1113,7 +1123,7 @@ class TarInfo(object): itn(info.get("size", 0), 12, format), itn(info.get("mtime", 0), 12, format), " ", # checksum field - info.get("type", REGTYPE), # TODO change to GNUTYPE_MULTIVOL when appropriate + info.get("type", REGTYPE), stn(info.get("linkname", ""), 100), stn(info.get("magic", POSIX_MAGIC), 8), stn(info.get("uname", ""), 32), @@ -1541,7 +1551,7 @@ class TarFile(object): if hasattr(fileobj, "mode"): self._mode = fileobj.mode self._extfileobj = True - self.name = os.path.abspath(name) if name else None + self.base_name = self.name = os.path.abspath(name) if name else None self.fileobj = fileobj # Init attributes. @@ -2037,33 +2047,36 @@ class TarFile(object): self.members.append(tarinfo) return - is_multivol = False - # handle multivolume support if self.max_volume_size: - size_left = self.max_volume_size - self.offset - max_size_to_write = min(size_left, tarinfo.size) + size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset + max_size_to_write = min(size_left, tarinfo.size - tarinfo.offset_data) else: size_left = max_size_to_write = tarinfo.size + data_written = 0 + # iterate, one iteration per volume (usually only one volume) - while size_left > 0: - if is_multivol: - buf = tarinfo.tobuf(self.format, self.encoding, self.errors) - self.fileobj.write(buf) - self.offset += len(buf) + while tarinfo.offset_data < tarinfo.size: copyfileobj(fileobj, self.fileobj, max_size_to_write) blocks, remainder = divmod(max_size_to_write, BLOCKSIZE) - if remainder > 0: + + # only fill with zeros the remainder in a block if it's not + # going to be a file splitted in multiple volumes. + # if file is going to be split in multiple volumes, having a + # remainder means that there's no more space left for a block, so + # we already need to create a new volume + if max_size_to_write == tarinfo.size and remainder > 0: self.fileobj.write(NUL * (BLOCKSIZE - remainder)) blocks += 1 + self.offset += blocks * BLOCKSIZE - size_left = self.max_volume_size - self.offset + tarinfo.offset_data += blocks * BLOCKSIZE + size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset - # create new volume is needed - if size_left <= 0: - tarinfo.offset_data += blocks * BLOCKSIZE - tarinfo.type == GNUTYPE_MULTIVOL + # check if creating a new volume is needed + if size_left <= BLOCKSIZE: + tarinfo.type = GNUTYPE_MULTIVOL if not self.new_volume_handler or\ not callable(self.new_volume_handler): @@ -2073,8 +2086,15 @@ class TarFile(object): # the new volume handler should do everything needed to # start working in a new volume. usually, the handler calls # to self.open_volume - self.new_volume_handler(self) - is_multivol = True + self.volume_number += 1 + self.new_volume_handler(self, self.base_name, self.volume_number) + + # write new volume header + buf = tarinfo.tobuf(self.format, self.encoding, self.errors) + self.offset += len(buf) + self.fileobj.write(buf) + size_left = self.max_volume_size - 2*BLOCKSIZE - self.offset + max_size_to_write = min(size_left, tarinfo.size - tarinfo.offset_data) self.members.append(tarinfo) diff --git a/tarfile_multivol_example.py b/tarfile_multivol_example.py index d8e36f2..a902bca 100644 --- a/tarfile_multivol_example.py +++ b/tarfile_multivol_example.py @@ -22,16 +22,16 @@ call to _Stream object to handle the new file event, in this case for reading. from tarfile import TarFile -def new_volume_handler(tarobj): - volume_path = "%s.%d" % (tarobj.name, tarobj.volume_number + 1) +def new_volume_handler(tarobj, base_name, volume_number): + volume_path = "%s.%d" % (base_name, volume_number) print "new volume: ", volume_path tarobj.open_volume(volume_path) # write -tar = TarFile.open("sample.tar.gz", - mode="w|gz", - max_volume_size=1024**2, +tar = TarFile.open("sample.tar", + mode="w", + max_volume_size=(1024**2)*4, new_volume_handler=new_volume_handler) tar.add("big") tar.close() -- 1.7.1