ignore the PowmInsecureWarning warning given by libgmp4 because it doesn't affect...
[python-delta-tar] / rescue_tar.py
CommitLineData
0112ba0d
ERE
1#!/usr/bin/env python
2
866c42e6
DGM
3# Copyright (C) 2013 Intra2net AG
4#
494b38aa
DGM
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; either version 3 of the License, or
866c42e6
DGM
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
494b38aa 13# GNU Lesser General Public License for more details.
866c42e6
DGM
14#
15# You should have received a copy of the GNU General Public License
494b38aa
DGM
16# along with this program. If not, see
17# <http://www.gnu.org/licenses/lgpl-3.0.html>
866c42e6
DGM
18
19
0112ba0d
ERE
20import argparse
21import os
22import tempfile
23from functools import partial
24
25from deltatar import tarfile
26import filesplit
27
28def rescue(tar_files, rescue_dir=None):
29 '''
30 Rescues a multivolume tarfile. Checks file name extension to detect
31 format (compression, etc). Assumes it to be multivolume tar.
32 '''
33 # setup rescue_dir
34 if isinstance(tar_files, basestring):
35 tar_files = [tar_files]
36
37 if not isinstance(tar_files, list):
38 raise Exception("tar_files must be a list")
39
40 for f in tar_files:
41 if not isinstance(f, basestring):
42 raise Exception("tar_files must be a list of strings")
43 if not os.path.exists(f):
44 raise Exception("tar file '%s' doesn't exist" % f)
45
46 if rescue_dir is None:
47 rescue_dir = os.path.dirname(tar_files[0])
48 elif rescue_dir is None:
49 rescue_dir = tempfile.mkdtemp()
50
51 # autodetect file type by extension
52 first_tar_file = tar_files[0]
53 if first_tar_file.endswith(".tar.gz"):
54 mode = "r#gz"
55 elif first_tar_file.endswith(".tar"):
56 mode = "r"
57
58 base_name = os.path.basename(first_tar_file)
59 extract_files = tar_files
60
61 # num the number of files used in rescue mode. Used to name those files
62 # when creating them. We put num in an object so that it can be referenced
63 # instead of copied inside new_gz partial
64 context = dict(num=0)
65
66 # divide in compressed tar block files if it's r#gz
67 if mode == "r#gz":
68 extract_files = []
69 # function used to create each chunk file
70 def new_gz(context, extract_files, prefix, i):
71 path = "%s.%d" %(prefix, context['num'])
72 extract_files.append(path)
73 context['num'] += 1
74 return open(path, 'w')
75 new_gz = partial(new_gz, context, extract_files)
76
77 # split in compressed chunks
78 for f in tar_files:
79 filesplit.split_file('\x1f\x8b',
80 os.path.join(rescue_dir, base_name), f, new_gz)
81
82 # includes volumes already extracted with new_volume_handler
83 already_extracted_vols = []
84
85 def new_volume_handler(already_extracted_vols, next_num, tarobj, base_name, volume_number):
86 '''
87 Handles the new volumes when extracting
88 '''
89
90 # handle the special case where the first file is whatever.tar.gz and
91 # the second is whatever.tar.gz.0
92 base_name_split = base_name.split('.')
93 next_num = 0
94 try:
95 next_num = int(base_name_split[-1]) + 1
96 base_name = ".".join(base_name_split[:-1])
97 except ValueError as e:
98 pass
99
100 volume_path = "%s.%d" % (base_name, next_num)
101 already_extracted_vols.append(volume_path)
102 tarobj.open_volume(volume_path)
103
104 new_volume_handler = partial(new_volume_handler, already_extracted_vols)
105
106 # extract files, as much as possible
107 for f in extract_files:
108 if f in already_extracted_vols:
109 continue
110 try:
111 tarobj = tarfile.TarFile.open(f, mode=mode,
112 new_volume_handler=new_volume_handler)
113 tarobj.extractall()
114 tarobj.close()
115 except:
116 pass
117
118if __name__ == "__main__":
119 parser = argparse.ArgumentParser()
120
121 parser.add_argument("--rescue_dir", help="directory where rescue files "
122 "should be created. /tmp by default")
123 parser.add_argument("tar_files", nargs="+", help="list of files of a "
124 "multitar file to rescue. Assumes format first.extension "
125 "second.extension.0 third.extension.1 ...")
126
127 args = parser.parse_args()
128 rescue(tar_files=args.tar_files, rescue_dir=args.rescue_dir)