GNUTYPE_LONGNAME = "L" # GNU tar longname
GNUTYPE_LONGLINK = "K" # GNU tar longlink
GNUTYPE_SPARSE = "S" # GNU tar sparse file
+GNUTYPE_MULTIVOL = "M" # GNU tar continuation of a file that began on
+ # another volume
XHDTYPE = "x" # POSIX.1-2001 extended header
XGLTYPE = "g" # POSIX.1-2001 global header
SYMTYPE, DIRTYPE, FIFOTYPE,
CONTTYPE, CHRTYPE, BLKTYPE,
GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
- GNUTYPE_SPARSE)
+ GNUTYPE_SPARSE, GNUTYPE_MULTIVOL)
# File types that will be treated as a regular file.
REGULAR_TYPES = (REGTYPE, AREGTYPE,
# File types that are part of the GNU tar format.
GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
- GNUTYPE_SPARSE)
+ GNUTYPE_SPARSE, GNUTYPE_MULTIVOL)
# Fields from a pax header that override a TarInfo attribute.
PAX_FIELDS = ("path", "linkpath", "size", "mtime",
itn(info.get("size", 0), 12, format),
itn(info.get("mtime", 0), 12, format),
" ", # checksum field
- info.get("type", REGTYPE),
+ info.get("type", REGTYPE), # TODO change to GNUTYPE_MULTIVOL when appropriate
stn(info.get("linkname", ""), 100),
stn(info.get("magic", POSIX_MAGIC), 8),
stn(info.get("uname", ""), 32),
return self.type == GNUTYPE_SPARSE
def isdev(self):
return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
+ def ismultivol(self):
+ return self.type == GNUTYPE_MULTIVOL
# class TarInfo
class TarFile(object):
ignore_zeros = False # If true, skips empty or invalid blocks and
# continues processing.
+ max_volume_size = None # If different from None, stablishes maximum
+ # size of tar volumes
+
+ new_volume_handler = None # function handler to be executed before when
+ # a new volume is needed
+
+ volume_number = 0 # current volume number, used for multi volume
+ # support
+
errorlevel = 1 # If 0, fatal errors only appear in debug
# messages (if debug >= 0). If > 0, errors
# are passed to the caller as exceptions.
def __init__(self, name=None, mode="r", fileobj=None, format=None,
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
- errors=None, pax_headers=None, debug=None, errorlevel=None):
+ errors=None, pax_headers=None, debug=None, errorlevel=None,
+ max_volume_size=None, new_volume_handler=None):
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
read from an existing archive, 'a' to append data to an existing
file or 'w' to create a new file overwriting an existing one. `mode'
self.errorlevel = errorlevel
# Init datastructures.
+ self.max_volume_size = max_volume_size
+ self.new_volume_handler = new_volume_handler
self.closed = False
self.members = [] # list of members as TarInfo objects
self._loaded = False # flag if all members have been read
self.fileobj.write(buf)
self.offset += len(buf)
- # If there's data to follow, append it.
- if fileobj is not None:
- copyfileobj(fileobj, self.fileobj, tarinfo.size)
- blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
+
+ # If there's no data to follow, finish
+ if not fileobj:
+ self.members.append(tarinfo)
+ return
+
+ is_multivol = False
+
+ # handle multivolume support
+ if self.max_volume_size:
+ size_left = self.max_volume_size - self.offset
+ max_size_to_write = min(size_left, tarinfo.size)
+ else:
+ size_left = max_size_to_write = tarinfo.size
+
+ # iterate, one iteration per volume (usually only one volume)
+ while size_left > 0:
+ if is_multivol:
+ buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
+ self.fileobj.write(buf)
+ self.offset += len(buf)
+ copyfileobj(fileobj, self.fileobj, max_size_to_write)
+ blocks, remainder = divmod(max_size_to_write, BLOCKSIZE)
if remainder > 0:
self.fileobj.write(NUL * (BLOCKSIZE - remainder))
blocks += 1
self.offset += blocks * BLOCKSIZE
+ size_left = self.max_volume_size - self.offset
+
+ # create new volume is needed
+ if size_left <= 0:
+ tarinfo.offset_data += blocks * BLOCKSIZE
+ tarinfo.type == GNUTYPE_MULTIVOL
+
+ if not self.new_volume_handler or\
+ not callable(self.new_volume_handler):
+ raise Exception("We need to create a new volume and you"
+ " didn't supply a new_volume_handler")
+
+ # the new volume handler should do everything needed to
+ # start working in a new volume. usually, the handler calls
+ # to self.open_volume
+ self.new_volume_handler(self)
+ is_multivol = True
self.members.append(tarinfo)
+ def open_volume(self, name="", fileobj=None):
+ '''
+ Called by the user to change this tar file to point to a new volume
+ '''
+ # open the file using either fileobj or name
+ if not fileobj:
+ if self.mode == "a" and not os.path.exists(name):
+ # Create nonexistent files in append mode.
+ self.mode = "w"
+ self._mode = "wb"
+ fileobj = bltn_open(name, self._mode)
+ self._extfileobj = False
+ else:
+ if name is None and hasattr(fileobj, "name"):
+ name = fileobj.name
+ if hasattr(fileobj, "mode"):
+ self._mode = fileobj.mode
+ self._extfileobj = True
+ self.name = os.path.abspath(name) if name else None
+ self.fileobj = fileobj
+
+ # init data structures
+ self.closed = False
+ self.members = [] # list of members as TarInfo objects
+ self._loaded = False # flag if all members have been read
+ self.offset = self.fileobj.tell()
+ # current position in the archive file
+ self.inodes = {} # dictionary caching the inodes of
+ # archive members already added
+
+ try:
+ if self.mode == "r":
+ self.firstmember = None
+ self.firstmember = self.next()
+
+ if self.mode == "a":
+ # Move to the end of the archive,
+ # before the first empty block.
+ while True:
+ self.fileobj.seek(self.offset)
+ try:
+ tarinfo = self.tarinfo.fromtarfile(self)
+ self.members.append(tarinfo)
+ except EOFHeaderError:
+ self.fileobj.seek(self.offset)
+ break
+ except HeaderError, e:
+ raise ReadError(str(e))
+
+ if self.mode in "aw":
+ self._loaded = True
+
+ if self.pax_headers:
+ buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
+ self.fileobj.write(buf)
+ self.offset += len(buf)
+ except:
+ if not self._extfileobj:
+ self.fileobj.close()
+ self.closed = True
+ raise
+
def extractall(self, path=".", members=None):
"""Extract all members from the archive to the current working
directory and set owner, modification time and permissions on
--- /dev/null
+
+'''
+when reading, the file being read is not going to fail reading because tar will
+have writen the tar file at appropiate sizes. so it's transparent for _Stream
+
+when writing, it will tarobj who will notice when that the file is too big, and
+thus it will be tarobj job to close the current stream and call to
+new_volume_handler before continue using stream for writing. But it will be
+still transparent from the stream object POV.
+
+
+In the case of restarting gzip compression with #gz:
+
+For writing it will be tarobj job to stop writing current file and tell the
+_Stream object to handle the new file event. So it will be _Stream job to do
+that.
+
+For reading it will be tarobj job to notice the end of a file when reading, and
+call to _Stream object to handle the new file event, in this case for reading.
+
+'''
+
+from tarfile import TarFile
+
+def new_volume_handler(tarobj):
+ volume_path = "%s.%d" % (tarobj.name, tarobj.volume_number + 1)
+ print "new volume: ", volume_path
+ tarobj.open_volume(volume_path)
+
+
+# write
+tar = TarFile.open("sample.tar.gz",
+ mode="w|gz",
+ max_volume_size=1024**2,
+ new_volume_handler=new_volume_handler)
+tar.add("big")
+tar.close()
+
+## read
+#tar = tarfile.open("sample.tar.gz",
+ #mode="r#gz",
+ #new_volume_handler=new_volume)
+#for name in ["foo", "bar", "quux"]:
+ #tar.add(name)
+#tar.close()
+
+# when creating a
\ No newline at end of file