1 # Copyright (C) 2013 Intra2net AG
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU Lesser General Public License as published
5 # by the Free Software Foundation; either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Lesser General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program. If not, see
15 # <http://www.gnu.org/licenses/lgpl-3.0.html>
18 import os, unittest, hashlib, string
20 from deltatar.tarfile import TarFile, GNU_FORMAT, GZ_MAGIC_BYTES
23 from . import BaseTest, new_volume_handler
25 class ConcatCompressTest(BaseTest):
27 Test concatenated compression in tarfiles
30 def test_zcat_extract_concat(self):
32 Create a tar file with only one file inside, using concat compression
33 mode. Then decompress it with zcat and untar it with gnu tar.
36 # create the content of the file to compress and hash it
37 hash = self.create_file("big", 50000)
39 # create the tar file with volumes
40 tarobj = TarFile.open("sample.tar.gz",
47 # extract with normal tar and check output
48 os.system("zcat sample.tar.gz > sample.tar")
49 os.system("tar xf sample.tar")
50 assert os.path.exists("big")
51 assert hash == self.md5sum("big")
53 def test_concat_extract(self):
55 Create a tar file with only one file inside, using concat compression
56 mode, then decompress it with tarlib module too.
59 # create the content of the file to compress and hash it
60 hash = self.create_file("big", 50000)
62 # create the tar file with volumes
63 tarobj = TarFile.open("sample.tar.gz",
69 tarobj = TarFile.open("sample.tar.gz",
73 assert os.path.exists("big")
74 assert hash == self.md5sum("big")
76 def test_concat_extract_fileobj(self):
78 Create a tar file with only one file inside, using concat compression
79 mode, then decompress it with tarlib module using the fileobj parameter.
82 # create the content of the file to compress and hash it
83 hash = self.create_file("big", 50000)
85 # create the tar file with volumes
86 tarobj = TarFile.open("sample.tar.gz",
89 pos = tarobj.get_last_member_offset()
93 fo = open("sample.tar.gz", 'rb') # will not be released on tarfile.close()
95 tarobj = TarFile.open(mode="r#gz", fileobj=fo)
96 tarobj.extract(tarobj.next())
99 assert os.path.exists("big")
100 assert hash == self.md5sum("big")
102 def test_concat_extract_one_fileobj(self):
104 Create a tar file with multiple files inside, using concat compression
105 mode, then decompress it with tarlib module using the fileobj parameter.
108 # create the content of the file to compress and hash it
110 hash["big"] = self.create_file("big", 50000)
111 hash["small"] = self.create_file("small", 100)
112 hash["small2"] = self.create_file("small2", 354)
114 # create the tar file with volumes
115 tarobj = TarFile.open("sample.tar.gz",
119 pos = tarobj.get_last_member_offset()
123 assert os.path.exists("sample.tar.gz")
129 # extract only the "small" file
130 fo = open("sample.tar.gz", 'rb') # will not be released on tarfile.close()
132 tarobj = TarFile.open(mode="r#gz", fileobj=fo)
133 tarobj.extract(tarobj.next())
136 assert os.path.exists("small")
137 assert hash['small'] == self.md5sum("small")
139 # we didn't extract the other files
140 assert not os.path.exists("big")
141 assert not os.path.exists("small2")
143 def test_concat_extract_one_fileobj_multivol(self):
145 Create a tar file with multiple files inside and multiple volume,
146 using concat compression mode, then decompress a file spanning two
147 volumess with tarlib module using the fileobj parameter.
150 # create the content of the file to compress and hash it
152 hash["small"] = self.create_file("small", 100000)
153 hash["big"] = self.create_file("big", 1200000)
155 # create the tar file with volumes
156 tarobj = TarFile.open("sample.tar.gz",
158 max_volume_size=1000000,
159 new_volume_handler=new_volume_handler)
162 pos = tarobj.get_last_member_offset()
165 assert os.path.exists("sample.tar.gz")
170 def new_volume_handler_fo(tarobj, base_name, volume_number):
172 Handles the new volumes, ignoring base_name as it'll be None because
173 we'll be using a seek fileobj.
175 volume_path = "sample.tar.gz.%d" % volume_number
176 tarobj.open_volume(volume_path)
178 # extract only the "small" file
179 fo = open("sample.tar.gz", 'rb') # will not be released on tarfile.close()
181 tarobj = TarFile.open(mode="r#gz", fileobj=fo,
182 new_volume_handler=new_volume_handler_fo)
183 tarobj.extract(tarobj.next())
186 assert os.path.exists("big")
187 assert hash['big'] == self.md5sum("big")
189 # we didn't extract the other files
190 assert not os.path.exists("small")
192 def test_multiple_files_zcat_extract(self):
194 Create a tar file with only multiple files inside, using concat
195 compression mode, then decompress the tarfile.
200 hash["big"] = self.create_file("big", 50000)
201 hash["small"] = self.create_file("small", 100)
202 hash["small2"] = self.create_file("small2", 354)
204 # create the tar file with volumes
205 tarobj = TarFile.open("sample.tar.gz",
212 assert os.path.exists("sample.tar.gz")
218 # extract and check output
219 os.system("zcat sample.tar.gz > sample.tar")
220 tarobj = TarFile.open("sample.tar",
225 for key, value in hash.items():
226 assert os.path.exists(key)
227 assert value == self.md5sum(key)
229 def test_multiple_files_concat_extract(self):
231 Create a tar file with only multiple files inside, using concat
232 compression mode, then decompress the tarfile.
237 hash["big"] = self.create_file("big", 50000)
238 hash["small"] = self.create_file("small", 100)
239 hash["small2"] = self.create_file("small2", 354)
241 # create the tar file with volumes
242 tarobj = TarFile.open("sample.tar.gz", mode="w#gz")
248 assert os.path.exists("sample.tar.gz")
254 # extract and check output
255 tarobj = TarFile.open("sample.tar.gz",
260 for key, value in hash.items():
261 assert os.path.exists(key)
262 assert value == self.md5sum(key)
264 def test_multivol_gzip_concat_extract(self):
266 Test multivol tarball with concat compression.
271 hash["big"] = self.create_file("big", 50000)
272 hash["big2"] = self.create_file("big2", 10200)
273 hash["small"] = self.create_file("small", 100)
274 hash["small2"] = self.create_file("small2", 354)
276 # create the tar file with volumes
277 tarobj = TarFile.open("sample.tar.gz",
279 max_volume_size=20000,
280 new_volume_handler=new_volume_handler)
287 assert os.path.exists("sample.tar.gz")
294 tarobj = TarFile.open("sample.tar.gz",
296 new_volume_handler=new_volume_handler)
301 for key, value in hash.items():
302 assert os.path.exists(key)
303 assert value == self.md5sum(key)
305 def test_multiple_files_rescue_extract(self):
307 Use filesplit utility to split the file in compressed tar blocks that
308 individually decompressed and "untarred", thanks to be using the
309 concat gzip tar format.
313 hash["big"] = self.create_file("big", 50000)
314 hash["small"] = self.create_file("small", 100)
315 hash["small2"] = self.create_file("small2", 354)
317 # create the tar file with volumes
318 tarobj = TarFile.open("sample.tar.gz", mode="w#gz")
324 assert os.path.exists("sample.tar.gz")
330 filesplit.split_file(GZ_MAGIC_BYTES, "sample.tar.gz.", "sample.tar.gz")
332 assert os.path.exists("sample.tar.gz.0") # first file
333 assert os.path.exists("sample.tar.gz.1") # second file
334 assert os.path.exists("sample.tar.gz.2") # third file
335 assert not os.path.exists("sample.tar.gz.3") # nothing else
337 # extract and check output
338 for i in range(0, 3):
339 tarobj = TarFile.open("sample.tar.gz.%d" % i,
344 for key, value in hash.items():
345 assert os.path.exists(key)
346 assert value == self.md5sum(key)
348 def test_multiple_files_rescue_extract_gnu(self):
350 Use filesplit utility to split the file in compressed tar blocks that
351 individually decompressed and "untarred", thanks to be using the
352 concat gzip tar format. We do the extraction with standard gnu tar and
353 gzip command line commands.
358 hash["big"] = self.create_file("big", 50000)
359 hash["small"] = self.create_file("small", 100)
360 hash["small2"] = self.create_file("small2", 354)
362 # create the tar file with volumes
363 tarobj = TarFile.open("sample.tar.gz", mode="w#gz")
369 assert os.path.exists("sample.tar.gz")
375 # extract using the command line this time
376 os.system("python3 filesplit.py -s $'\\x1f\\x8b' -p sample.tar.gz. sample.tar.gz")
378 assert os.path.exists("sample.tar.gz.0") # first file
379 assert os.path.exists("sample.tar.gz.1") # second file
380 assert os.path.exists("sample.tar.gz.2") # third file
381 assert not os.path.exists("sample.tar.gz.3") # nothing else
383 # extract and check output
384 for i in range(0, 3):
385 os.system("gzip -cd sample.tar.gz.%d > sample.%d.tar" % (i, i))
386 os.system("tar xf sample.%d.tar" % i)
388 for key, value in hash.items():
389 assert os.path.exists(key)
390 assert value == self.md5sum(key)
392 def test_multiple_files_rescue_extract_broken(self):
394 Use filesplit utility to split the file in compressed tar blocks that
395 individually decompressed and "untarred", thanks to be using the
396 concat gzip tar format. In this case, we simulate that one of the files
397 is corrupted. The rest will decompress just fine.
402 hash["big"] = self.create_file("big", 50000)
403 hash["small"] = self.create_file("small", 100)
404 hash["small2"] = self.create_file("small2", 354)
406 # create the tar file with volumes
407 tarobj = TarFile.open("sample.tar.gz", mode="w#gz")
413 assert os.path.exists("sample.tar.gz")
415 # overwrite stuff in the middle of the big file
416 f = open('sample.tar.gz', 'r+b')
418 f.write(bytes("breaking things", 'UTF-8'))
425 # equivalent to $ python filesplit.py -s $'\x1f\x8b' -p sample.tar.gz. sample.tar.gz
426 filesplit.split_file(GZ_MAGIC_BYTES, "sample.tar.gz.", "sample.tar.gz")
428 assert os.path.exists("sample.tar.gz.0") # first file
429 assert os.path.exists("sample.tar.gz.1") # second file
430 assert os.path.exists("sample.tar.gz.2") # third file
431 assert not os.path.exists("sample.tar.gz.3") # nothing else
433 # extract and check output
434 for i in range(0, 3):
436 tarobj = TarFile.open("sample.tar.gz.%d" % i,
440 except Exception as e:
441 if i == 0: # big file doesn't extract well because it's corrupted
444 raise Exception("Error extracting a tar.gz not related to the broken 'big' file")
446 for key, value in hash.items():
448 assert os.path.exists(key)
449 assert value == self.md5sum(key)