From: Christian Herdtweck Date: Fri, 18 May 2018 13:57:04 +0000 (+0200) Subject: Create a streamable version of ZipFile X-Git-Tag: v1.2~1^2 X-Git-Url: http://developer.intra2net.com/git/?a=commitdiff_plain;h=c2bdf47bf40c5adcac69538b7ec9abb537b69e96;p=pyi2ncommon Create a streamable version of ZipFile Python's ZipFile requires data in memory or on disc in order to compress it. This module contains class ZipStream that extends ZipFile to allow read-only, non-seekable streams as input. For python < 3.5 this requires python-zipfile35; implementation is MUCH simpler for python >= 3.6 --- diff --git a/src/zip_stream.py b/src/zip_stream.py new file mode 100644 index 0000000..36e46ba --- /dev/null +++ b/src/zip_stream.py @@ -0,0 +1,205 @@ +""" Streamable version of zipfile + +Python's :py:class:`zipfile.ZipFile` can only write to seekable streams +since version 3.5 and only implements adding files as wholes. This module +implements class :py:class:`ZipStream` which is a subclass of ZipFile that can +read from non-seekable input streams and write to non-seekable output streams. + +.. codeauthor:: Intra2net AG +""" + +import sys +import os + +if sys.version_info.major >= 3 and sys.version_info.minor >= 6: + from zipfile import * + import shutil +elif sys.version_info.major >= 3 and sys.version_info.minor >= 5: + from stat import S_ISDIR + import time + import zlib + crc32 = zlib.crc32 + import bz2 + import struct + from zipfile import * +else: + from stat import S_ISDIR + import time + import zlib + crc32 = zlib.crc32 + import bz2 + import struct + # backport of zipfile from python 3.5; works at least for py3.3 + from zipfile35 import * +from type_helpers import isstr + +ZIP64_LIMIT = (1 << 31) - 1 + +def _get_compressor(compress_type): + """Copied fomr zipfile.py in py3.5 (cannot legally import)""" + if compress_type == ZIP_DEFLATED: + return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, + zlib.DEFLATED, -15) + elif compress_type == ZIP_BZIP2: + return bz2.BZ2Compressor() + elif compress_type == ZIP_LZMA: + return LZMACompressor() + else: + return None + +class ZipStream(ZipFile): + """Subclass of ZipFile that supports non-seekable input and output""" + + def create_zipinfo(self, filename, arcname=None): + """ + Create ZipInfo for given file + + Optionally set arcname as name of file inside archive. + + Adapted from zipfile.py in (ZipInfo.from_file in py3.6, ZipFile.write + in py3.5) + """ + if sys.version_info.major >= 3 and sys.version_info.minor >= 6: + return ZipInfo.from_file(filename, arcname) + + st = os.stat(filename) + isdir = S_ISDIR(st.st_mode) + mtime = time.localtime(st.st_mtime) + date_time = mtime[0:6] + # Create ZipInfo instance to store file information + if arcname is None: + arcname = filename + arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) + while arcname[0] in (os.sep, os.altsep): + arcname = arcname[1:] + if isdir: + arcname += '/' + zinfo = ZipInfo(arcname, date_time) + zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes + if isdir: + zinfo.compress_type = ZIP_STORED + zinfo.file_size = 0 + zinfo.external_attr |= 0x10 # MS-DOS directory flag + else: + zinfo.compress_type = self.compression + zinfo.file_size = st.st_size + + return zinfo + + def write_stream(self, src, zinfo): + """ + Add data from byte stream stream src to archive with info in ZipInfo. + + Param zinfo must be a ZipInfo, created e.g. with + :py:meth:`ZipStream.create_zipinfo` + + Note: you cannot add directories this way (removed the corresponding + code). + + This is a shortened version of python's + :py:func:`zipfile.ZipFile.write`. + """ + if sys.version_info.major >= 3 and sys.version_info.minor >= 6: + return self._write_stream_36(src, zinfo) + else: + return self._write_stream_35(src, zinfo) + + + def _write_stream_35(self, src, zinfo): + """Implementation of _write_stream based on ZipFile.write (py 3.5)""" + if not self.fp: + raise RuntimeError( + "Attempt to write to ZIP archive that was already closed") + + zinfo.flag_bits = 0x00 + + with self._lock: + zinfo.header_offset = self.fp.tell() # Start of header bytes + if zinfo.compress_type == ZIP_LZMA: + # Compressed data includes an end-of-stream (EOS) marker + zinfo.flag_bits |= 0x02 + + self._writecheck(zinfo) + self._didModify = True + + cmpr = _get_compressor(zinfo.compress_type) + zinfo.flag_bits |= 0x08 + + # Must overwrite CRC and sizes with correct data later + zinfo.CRC = CRC = 0 + zinfo.compress_size = compress_size = 0 + # Compressed size can be larger than uncompressed size + zip64 = self._allowZip64 and \ + zinfo.file_size * 1.05 > ZIP64_LIMIT + self.fp.write(zinfo.FileHeader(zip64)) + file_size = 0 + while 1: + buf = src.read(1024 * 8) + if not buf: + break + file_size = file_size + len(buf) + CRC = crc32(buf, CRC) + if cmpr: + buf = cmpr.compress(buf) + compress_size = compress_size + len(buf) + self.fp.write(buf) + if cmpr: + buf = cmpr.flush() + compress_size = compress_size + len(buf) + self.fp.write(buf) + zinfo.compress_size = compress_size + else: + zinfo.compress_size = file_size + zinfo.CRC = CRC + zinfo.file_size = file_size + + # Write CRC and file sizes after the file data + fmt = '