From: Christian Herdtweck Date: Thu, 9 Jun 2016 15:58:04 +0000 (+0200) Subject: changed TarFile.addfile to get better sized volumes if compressing X-Git-Tag: v2.2~35^2~22 X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=ae9c8de289127fc73d85fe2e14fbb02ac83de5d8;p=python-delta-tar changed TarFile.addfile to get better sized volumes if compressing --- diff --git a/deltatar/tarfile.py b/deltatar/tarfile.py index 230b47e..e0eacde 100644 --- a/deltatar/tarfile.py +++ b/deltatar/tarfile.py @@ -2393,6 +2393,13 @@ class TarFile(object): self.fileobj.write(buf) self.offset += len(buf) + if self.max_volume_size: + if isinstance(self.fileobj, _Stream): + _size_left = self._size_left_stream + else: + _size_left = self._size_left_file + else: + _size_left = lambda: tarinfo.size # If there's no data to follow, finish if not fileobj: @@ -2400,71 +2407,73 @@ class TarFile(object): self.members.append(tarinfo) return - # handle multivolume support - if self.max_volume_size: - size_left = self._size_left() - # we only split volumes in the middle of a file, that means we have - # to write at least one block - if size_left < BLOCKSIZE: - size_left = BLOCKSIZE - max_size_to_write = min(size_left, tarinfo.size - tarinfo.volume_offset) - else: - size_left = max_size_to_write = tarinfo.size - - # iterate, one iteration per volume (usually only one volume) - while tarinfo.volume_offset < tarinfo.size: - copyfileobj(fileobj, self.fileobj, max_size_to_write) - blocks, remainder = divmod(max_size_to_write, BLOCKSIZE) - - # only fill with zeros the remainder in a block if it's not - # going to be a file splitted in multiple volumes. - # if file is going to be split in multiple volumes, having a - # remainder means that there's no more space left for a block, so - # we already need to create a new volume. - if remainder > 0: - self.fileobj.write(NUL * (BLOCKSIZE - remainder)) - blocks += 1 + target_size_left = _size_left() + source_size_left = tarinfo.size + assert tarinfo.volume_offset == 0 + + # we only split volumes in the middle of a file, that means we have + # to write at least one block + if target_size_left < BLOCKSIZE: + target_size_left = BLOCKSIZE - # we already assured previously that if we are doing multivolume, - # there's not going to be a remainder - if self.max_volume_size and max_size_to_write == size_left: - assert remainder == 0 + # loop over multiple volumes + while source_size_left > 0: + # Write as much data as possble from source into target. + # When compressing data, we cannot easily predict how much data we + # can write until target_size_left == 0 --> need to iterate + size_can_write = min(target_size_left, source_size_left) - self.offset += blocks * BLOCKSIZE - size_left -= blocks * BLOCKSIZE - tarinfo.volume_offset += blocks * BLOCKSIZE + while size_can_write > 0: + copyfileobj(fileobj, self.fileobj, size_can_write) + self.offset += size_can_write + source_size_left -= size_can_write + target_size_left = _size_left() + size_can_write = min(target_size_left, source_size_left) - # check if creating a new volume is needed - if tarinfo.volume_offset < tarinfo.size and\ - self.max_volume_size and size_left < 3*BLOCKSIZE: + # now target_size_left == 0 or source_size_left == 0 + + # if there is data left to write, we need to create a new volume + if source_size_left > 0: tarinfo.type = GNUTYPE_MULTIVOL if not self.new_volume_handler or\ not callable(self.new_volume_handler): raise Exception("We need to create a new volume and you " - "didn't supply a new_volume_handler") + "didn't supply a new_volume_handler") # the new volume handler should do everything needed to # start working in a new volume. usually, the handler calls # to self.open_volume self.volume_number += 1 - # set to be used by open_volume, becuase in the case of a PAX + # set to be used by open_volume, because in the case of a PAX # tar it needs to write information about the volume and offset # in the global header + tarinfo.volume_offset = tarinfo.size - source_size_left self.volume_tarinfo = tarinfo + self.new_volume_handler(self, self.base_name, self.volume_number) self.volume_tarinfo = None # write new volume header buf = tarinfo.tobuf(self.format, self.encoding, self.errors) - self.offset += len(buf) self.fileobj.write(buf) - size_left = self._size_left() - max_size_to_write = min(size_left, tarinfo.size - tarinfo.volume_offset) + self.offset += len(buf) + + # adjust variables; open_volume should have reset self.offset + # --> _size_left should be big again + target_size_left = _size_left() + size_can_write = min(target_size_left, source_size_left) + + # now, all data has been written. We may have to fill up the rest of + # the block in target with 0s + remainder = (tarinfo.size - tarinfo.volume_offset) % BLOCKSIZE + if remainder > 0: + self.fileobj.write(NUL * (BLOCKSIZE - remainder)) + self.offset += BLOCKSIZE - remainder if self.save_to_members: self.members.append(tarinfo)