Commit | Line | Data |
---|---|---|
866c42e6 DGM |
1 | # Copyright (C) 2013 Intra2net AG |
2 | # | |
494b38aa DGM |
3 | # This program is free software; you can redistribute it and/or modify |
4 | # it under the terms of the GNU Lesser General Public License as published | |
5 | # by the Free Software Foundation; either version 3 of the License, or | |
866c42e6 DGM |
6 | # (at your option) any later version. |
7 | # | |
8 | # This program is distributed in the hope that it will be useful, | |
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
494b38aa | 11 | # GNU Lesser General Public License for more details. |
866c42e6 DGM |
12 | # |
13 | # You should have received a copy of the GNU General Public License | |
494b38aa DGM |
14 | # along with this program. If not, see |
15 | # <http://www.gnu.org/licenses/lgpl-3.0.html> | |
866c42e6 DGM |
16 | |
17 | ||
5fdff89f ERE |
18 | import os, unittest, hashlib, string |
19 | ||
d601d33b | 20 | from deltatar.tarfile import TarFile, GNU_FORMAT, GZ_MAGIC_BYTES |
5fdff89f | 21 | |
3759f796 | 22 | import filesplit |
26fa5ad5 | 23 | from . import BaseTest, new_volume_handler |
5fdff89f | 24 | |
0112ba0d | 25 | class ConcatCompressTest(BaseTest): |
5fdff89f ERE |
26 | """ |
27 | Test concatenated compression in tarfiles | |
28 | """ | |
29 | ||
85737f48 | 30 | def test_zcat_extract_concat(self): |
5fdff89f ERE |
31 | """ |
32 | Create a tar file with only one file inside, using concat compression | |
33 | mode. Then decompress it with zcat and untar it with gnu tar. | |
34 | """ | |
35 | ||
36 | # create the content of the file to compress and hash it | |
37 | hash = self.create_file("big", 50000) | |
38 | ||
39 | # create the tar file with volumes | |
40 | tarobj = TarFile.open("sample.tar.gz", | |
41 | mode="w#gz", | |
d1c38f40 | 42 | format=GNU_FORMAT) |
5fdff89f ERE |
43 | tarobj.add("big") |
44 | tarobj.close() | |
45 | os.unlink("big") | |
46 | ||
47 | # extract with normal tar and check output | |
48 | os.system("zcat sample.tar.gz > sample.tar") | |
49 | os.system("tar xf sample.tar") | |
50 | assert os.path.exists("big") | |
51 | assert hash == self.md5sum("big") | |
85737f48 ERE |
52 | |
53 | def test_concat_extract(self): | |
54 | ''' | |
55 | Create a tar file with only one file inside, using concat compression | |
56 | mode, then decompress it with tarlib module too. | |
57 | ''' | |
58 | ||
59 | # create the content of the file to compress and hash it | |
60 | hash = self.create_file("big", 50000) | |
61 | ||
62 | # create the tar file with volumes | |
63 | tarobj = TarFile.open("sample.tar.gz", | |
d1c38f40 | 64 | mode="w#gz") |
85737f48 ERE |
65 | tarobj.add("big") |
66 | tarobj.close() | |
67 | os.unlink("big") | |
68 | ||
69 | tarobj = TarFile.open("sample.tar.gz", | |
70 | mode="r#gz") | |
71 | tarobj.extractall() | |
72 | tarobj.close() | |
73 | assert os.path.exists("big") | |
74 | assert hash == self.md5sum("big") | |
75 | ||
ad4402e8 ERE |
76 | def test_concat_extract_fileobj(self): |
77 | ''' | |
78 | Create a tar file with only one file inside, using concat compression | |
79 | mode, then decompress it with tarlib module using the fileobj parameter. | |
80 | ''' | |
81 | ||
82 | # create the content of the file to compress and hash it | |
83 | hash = self.create_file("big", 50000) | |
84 | ||
85 | # create the tar file with volumes | |
86 | tarobj = TarFile.open("sample.tar.gz", | |
d1c38f40 | 87 | mode="w#gz") |
ad4402e8 ERE |
88 | tarobj.add("big") |
89 | pos = tarobj.get_last_member_offset() | |
90 | tarobj.close() | |
91 | os.unlink("big") | |
92 | ||
bcc8b174 | 93 | fo = open("sample.tar.gz", 'rb') # will not be released on tarfile.close() |
ad4402e8 ERE |
94 | fo.seek(pos) |
95 | tarobj = TarFile.open(mode="r#gz", fileobj=fo) | |
96 | tarobj.extract(tarobj.next()) | |
97 | tarobj.close() | |
bcc8b174 | 98 | fo.close() |
ad4402e8 ERE |
99 | assert os.path.exists("big") |
100 | assert hash == self.md5sum("big") | |
101 | ||
102 | def test_concat_extract_one_fileobj(self): | |
103 | ''' | |
104 | Create a tar file with multiple files inside, using concat compression | |
105 | mode, then decompress it with tarlib module using the fileobj parameter. | |
106 | ''' | |
107 | ||
108 | # create the content of the file to compress and hash it | |
109 | hash = dict() | |
110 | hash["big"] = self.create_file("big", 50000) | |
111 | hash["small"] = self.create_file("small", 100) | |
112 | hash["small2"] = self.create_file("small2", 354) | |
113 | ||
114 | # create the tar file with volumes | |
115 | tarobj = TarFile.open("sample.tar.gz", | |
d1c38f40 | 116 | mode="w#gz") |
ad4402e8 ERE |
117 | tarobj.add("big") |
118 | tarobj.add("small") | |
119 | pos = tarobj.get_last_member_offset() | |
120 | tarobj.add("small2") | |
121 | tarobj.close() | |
122 | ||
123 | assert os.path.exists("sample.tar.gz") | |
124 | ||
125 | os.unlink("big") | |
126 | os.unlink("small") | |
127 | os.unlink("small2") | |
128 | ||
129 | # extract only the "small" file | |
bcc8b174 | 130 | fo = open("sample.tar.gz", 'rb') # will not be released on tarfile.close() |
ad4402e8 ERE |
131 | fo.seek(pos) |
132 | tarobj = TarFile.open(mode="r#gz", fileobj=fo) | |
133 | tarobj.extract(tarobj.next()) | |
134 | tarobj.close() | |
bcc8b174 | 135 | fo.close() |
ad4402e8 ERE |
136 | assert os.path.exists("small") |
137 | assert hash['small'] == self.md5sum("small") | |
138 | ||
139 | # we didn't extract the other files | |
140 | assert not os.path.exists("big") | |
141 | assert not os.path.exists("small2") | |
142 | ||
b8fc2f5d ERE |
143 | def test_concat_extract_one_fileobj_multivol(self): |
144 | ''' | |
145 | Create a tar file with multiple files inside and multiple volume, | |
146 | using concat compression mode, then decompress a file spanning two | |
147 | volumess with tarlib module using the fileobj parameter. | |
148 | ''' | |
149 | ||
150 | # create the content of the file to compress and hash it | |
151 | hash = dict() | |
152 | hash["small"] = self.create_file("small", 100000) | |
153 | hash["big"] = self.create_file("big", 1200000) | |
154 | ||
155 | # create the tar file with volumes | |
156 | tarobj = TarFile.open("sample.tar.gz", | |
157 | mode="w#gz", | |
b8fc2f5d ERE |
158 | max_volume_size=1000000, |
159 | new_volume_handler=new_volume_handler) | |
160 | tarobj.add("small") | |
161 | tarobj.add("big") | |
162 | pos = tarobj.get_last_member_offset() | |
163 | tarobj.close() | |
164 | ||
165 | assert os.path.exists("sample.tar.gz") | |
166 | ||
167 | os.unlink("big") | |
168 | os.unlink("small") | |
169 | ||
170 | def new_volume_handler_fo(tarobj, base_name, volume_number): | |
171 | ''' | |
172 | Handles the new volumes, ignoring base_name as it'll be None because | |
173 | we'll be using a seek fileobj. | |
174 | ''' | |
175 | volume_path = "sample.tar.gz.%d" % volume_number | |
176 | tarobj.open_volume(volume_path) | |
177 | ||
178 | # extract only the "small" file | |
bcc8b174 | 179 | fo = open("sample.tar.gz", 'rb') # will not be released on tarfile.close() |
b8fc2f5d ERE |
180 | fo.seek(pos) |
181 | tarobj = TarFile.open(mode="r#gz", fileobj=fo, | |
b8fc2f5d ERE |
182 | new_volume_handler=new_volume_handler_fo) |
183 | tarobj.extract(tarobj.next()) | |
184 | tarobj.close() | |
bcc8b174 | 185 | fo.close() |
b8fc2f5d ERE |
186 | assert os.path.exists("big") |
187 | assert hash['big'] == self.md5sum("big") | |
188 | ||
189 | # we didn't extract the other files | |
190 | assert not os.path.exists("small") | |
191 | ||
85737f48 ERE |
192 | def test_multiple_files_zcat_extract(self): |
193 | ''' | |
194 | Create a tar file with only multiple files inside, using concat | |
195 | compression mode, then decompress the tarfile. | |
196 | ''' | |
197 | ||
198 | # create sample data | |
199 | hash = dict() | |
200 | hash["big"] = self.create_file("big", 50000) | |
201 | hash["small"] = self.create_file("small", 100) | |
202 | hash["small2"] = self.create_file("small2", 354) | |
203 | ||
204 | # create the tar file with volumes | |
205 | tarobj = TarFile.open("sample.tar.gz", | |
d1c38f40 | 206 | mode="w#gz") |
85737f48 ERE |
207 | tarobj.add("big") |
208 | tarobj.add("small") | |
209 | tarobj.add("small2") | |
210 | tarobj.close() | |
211 | ||
212 | assert os.path.exists("sample.tar.gz") | |
213 | ||
214 | os.unlink("big") | |
215 | os.unlink("small") | |
216 | os.unlink("small2") | |
217 | ||
218 | # extract and check output | |
219 | os.system("zcat sample.tar.gz > sample.tar") | |
220 | tarobj = TarFile.open("sample.tar", | |
221 | mode="r") | |
222 | tarobj.extractall() | |
223 | tarobj.close() | |
224 | ||
be60ffd0 | 225 | for key, value in hash.items(): |
85737f48 ERE |
226 | assert os.path.exists(key) |
227 | assert value == self.md5sum(key) | |
228 | ||
229 | def test_multiple_files_concat_extract(self): | |
230 | ''' | |
231 | Create a tar file with only multiple files inside, using concat | |
232 | compression mode, then decompress the tarfile. | |
233 | ''' | |
234 | ||
235 | # create sample data | |
236 | hash = dict() | |
237 | hash["big"] = self.create_file("big", 50000) | |
238 | hash["small"] = self.create_file("small", 100) | |
239 | hash["small2"] = self.create_file("small2", 354) | |
240 | ||
241 | # create the tar file with volumes | |
d1c38f40 | 242 | tarobj = TarFile.open("sample.tar.gz", mode="w#gz") |
85737f48 ERE |
243 | tarobj.add("big") |
244 | tarobj.add("small") | |
245 | tarobj.add("small2") | |
246 | tarobj.close() | |
247 | ||
248 | assert os.path.exists("sample.tar.gz") | |
249 | ||
250 | os.unlink("big") | |
251 | os.unlink("small") | |
252 | os.unlink("small2") | |
253 | ||
254 | # extract and check output | |
255 | tarobj = TarFile.open("sample.tar.gz", | |
256 | mode="r#gz") | |
257 | tarobj.extractall() | |
258 | tarobj.close() | |
259 | ||
be60ffd0 | 260 | for key, value in hash.items(): |
85737f48 ERE |
261 | assert os.path.exists(key) |
262 | assert value == self.md5sum(key) | |
3759f796 | 263 | |
26fa5ad5 ERE |
264 | def test_multivol_gzip_concat_extract(self): |
265 | ''' | |
266 | Test multivol tarball with concat compression. | |
267 | ''' | |
268 | ||
269 | # create sample data | |
270 | hash = dict() | |
271 | hash["big"] = self.create_file("big", 50000) | |
272 | hash["big2"] = self.create_file("big2", 10200) | |
273 | hash["small"] = self.create_file("small", 100) | |
274 | hash["small2"] = self.create_file("small2", 354) | |
275 | ||
276 | # create the tar file with volumes | |
277 | tarobj = TarFile.open("sample.tar.gz", | |
278 | mode="w#gz", | |
26fa5ad5 ERE |
279 | max_volume_size=20000, |
280 | new_volume_handler=new_volume_handler) | |
281 | tarobj.add("big") | |
282 | tarobj.add("big2") | |
283 | tarobj.add("small") | |
284 | tarobj.add("small2") | |
285 | tarobj.close() | |
286 | ||
287 | assert os.path.exists("sample.tar.gz") | |
288 | os.unlink("big") | |
289 | os.unlink("big2") | |
290 | os.unlink("small") | |
291 | os.unlink("small2") | |
292 | ||
293 | # extract | |
294 | tarobj = TarFile.open("sample.tar.gz", | |
295 | mode="r#gz", | |
296 | new_volume_handler=new_volume_handler) | |
297 | tarobj.extractall() | |
298 | tarobj.close() | |
299 | ||
300 | # check output | |
be60ffd0 | 301 | for key, value in hash.items(): |
26fa5ad5 ERE |
302 | assert os.path.exists(key) |
303 | assert value == self.md5sum(key) | |
304 | ||
3759f796 ERE |
305 | def test_multiple_files_rescue_extract(self): |
306 | ''' | |
307 | Use filesplit utility to split the file in compressed tar blocks that | |
308 | individually decompressed and "untarred", thanks to be using the | |
309 | concat gzip tar format. | |
310 | ''' | |
3759f796 ERE |
311 | # create sample data |
312 | hash = dict() | |
313 | hash["big"] = self.create_file("big", 50000) | |
314 | hash["small"] = self.create_file("small", 100) | |
315 | hash["small2"] = self.create_file("small2", 354) | |
316 | ||
317 | # create the tar file with volumes | |
d1c38f40 | 318 | tarobj = TarFile.open("sample.tar.gz", mode="w#gz") |
3759f796 ERE |
319 | tarobj.add("big") |
320 | tarobj.add("small") | |
321 | tarobj.add("small2") | |
322 | tarobj.close() | |
323 | ||
324 | assert os.path.exists("sample.tar.gz") | |
325 | ||
326 | os.unlink("big") | |
327 | os.unlink("small") | |
328 | os.unlink("small2") | |
329 | ||
d601d33b | 330 | filesplit.split_file(GZ_MAGIC_BYTES, "sample.tar.gz.", "sample.tar.gz") |
3759f796 | 331 | |
a8a5714f PG |
332 | assert os.path.exists("sample.tar.gz.0") # first file |
333 | assert os.path.exists("sample.tar.gz.1") # second file | |
334 | assert os.path.exists("sample.tar.gz.2") # third file | |
335 | assert not os.path.exists("sample.tar.gz.3") # nothing else | |
3759f796 ERE |
336 | |
337 | # extract and check output | |
a8a5714f | 338 | for i in range(0, 3): |
3759f796 ERE |
339 | tarobj = TarFile.open("sample.tar.gz.%d" % i, |
340 | mode="r|gz") | |
341 | tarobj.extractall() | |
342 | tarobj.close() | |
343 | ||
be60ffd0 | 344 | for key, value in hash.items(): |
3759f796 ERE |
345 | assert os.path.exists(key) |
346 | assert value == self.md5sum(key) | |
347 | ||
348 | def test_multiple_files_rescue_extract_gnu(self): | |
349 | ''' | |
350 | Use filesplit utility to split the file in compressed tar blocks that | |
351 | individually decompressed and "untarred", thanks to be using the | |
352 | concat gzip tar format. We do the extraction with standard gnu tar and | |
353 | gzip command line commands. | |
354 | ''' | |
355 | ||
356 | # create sample data | |
357 | hash = dict() | |
358 | hash["big"] = self.create_file("big", 50000) | |
359 | hash["small"] = self.create_file("small", 100) | |
360 | hash["small2"] = self.create_file("small2", 354) | |
361 | ||
362 | # create the tar file with volumes | |
d1c38f40 | 363 | tarobj = TarFile.open("sample.tar.gz", mode="w#gz") |
3759f796 ERE |
364 | tarobj.add("big") |
365 | tarobj.add("small") | |
366 | tarobj.add("small2") | |
367 | tarobj.close() | |
368 | ||
369 | assert os.path.exists("sample.tar.gz") | |
370 | ||
371 | os.unlink("big") | |
372 | os.unlink("small") | |
373 | os.unlink("small2") | |
374 | ||
375 | # extract using the command line this time | |
789b0ac9 | 376 | os.system("python3 filesplit.py -s $'\\x1f\\x8b' -p sample.tar.gz. sample.tar.gz") |
3759f796 | 377 | |
a8a5714f PG |
378 | assert os.path.exists("sample.tar.gz.0") # first file |
379 | assert os.path.exists("sample.tar.gz.1") # second file | |
380 | assert os.path.exists("sample.tar.gz.2") # third file | |
381 | assert not os.path.exists("sample.tar.gz.3") # nothing else | |
3759f796 ERE |
382 | |
383 | # extract and check output | |
a8a5714f | 384 | for i in range(0, 3): |
3759f796 ERE |
385 | os.system("gzip -cd sample.tar.gz.%d > sample.%d.tar" % (i, i)) |
386 | os.system("tar xf sample.%d.tar" % i) | |
387 | ||
be60ffd0 | 388 | for key, value in hash.items(): |
3759f796 ERE |
389 | assert os.path.exists(key) |
390 | assert value == self.md5sum(key) | |
391 | ||
392 | def test_multiple_files_rescue_extract_broken(self): | |
393 | ''' | |
394 | Use filesplit utility to split the file in compressed tar blocks that | |
395 | individually decompressed and "untarred", thanks to be using the | |
396 | concat gzip tar format. In this case, we simulate that one of the files | |
397 | is corrupted. The rest will decompress just fine. | |
398 | ''' | |
399 | ||
400 | # create sample data | |
401 | hash = dict() | |
402 | hash["big"] = self.create_file("big", 50000) | |
403 | hash["small"] = self.create_file("small", 100) | |
404 | hash["small2"] = self.create_file("small2", 354) | |
405 | ||
406 | # create the tar file with volumes | |
d1c38f40 | 407 | tarobj = TarFile.open("sample.tar.gz", mode="w#gz") |
3759f796 ERE |
408 | tarobj.add("big") |
409 | tarobj.add("small") | |
410 | tarobj.add("small2") | |
411 | tarobj.close() | |
412 | ||
413 | assert os.path.exists("sample.tar.gz") | |
414 | ||
415 | # overwrite stuff in the middle of the big file | |
4e82a669 | 416 | f = open('sample.tar.gz', 'r+b') |
3759f796 | 417 | f.seek(100) |
be60ffd0 | 418 | f.write(bytes("breaking things", 'UTF-8')) |
3759f796 ERE |
419 | f.close() |
420 | ||
421 | os.unlink("big") | |
422 | os.unlink("small") | |
423 | os.unlink("small2") | |
424 | ||
425 | # equivalent to $ python filesplit.py -s $'\x1f\x8b' -p sample.tar.gz. sample.tar.gz | |
d601d33b | 426 | filesplit.split_file(GZ_MAGIC_BYTES, "sample.tar.gz.", "sample.tar.gz") |
3759f796 | 427 | |
a8a5714f PG |
428 | assert os.path.exists("sample.tar.gz.0") # first file |
429 | assert os.path.exists("sample.tar.gz.1") # second file | |
430 | assert os.path.exists("sample.tar.gz.2") # third file | |
431 | assert not os.path.exists("sample.tar.gz.3") # nothing else | |
3759f796 ERE |
432 | |
433 | # extract and check output | |
a8a5714f | 434 | for i in range(0, 3): |
3759f796 ERE |
435 | try: |
436 | tarobj = TarFile.open("sample.tar.gz.%d" % i, | |
437 | mode="r|gz") | |
438 | tarobj.extractall() | |
439 | tarobj.close() | |
440 | except Exception as e: | |
a8a5714f | 441 | if i == 0: # big file doesn't extract well because it's corrupted |
3759f796 ERE |
442 | pass |
443 | else: | |
444 | raise Exception("Error extracting a tar.gz not related to the broken 'big' file") | |
445 | ||
be60ffd0 | 446 | for key, value in hash.items(): |
3759f796 ERE |
447 | if key != "big": |
448 | assert os.path.exists(key) | |
449 | assert value == self.md5sum(key) |