Commit | Line | Data |
---|---|---|
0112ba0d ERE |
1 | #!/usr/bin/env python |
2 | ||
866c42e6 DGM |
3 | # Copyright (C) 2013 Intra2net AG |
4 | # | |
5 | # This program is free software: you can redistribute it and/or modify | |
6 | # it under the terms of the GNU General Public License as published by | |
7 | # the Free Software Foundation, either version 3 of the License, or | |
8 | # (at your option) any later version. | |
9 | # | |
10 | # This program is distributed in the hope that it will be useful, | |
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | # GNU General Public License for more details. | |
14 | # | |
15 | # You should have received a copy of the GNU General Public License | |
16 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
17 | ||
18 | ||
0112ba0d ERE |
19 | import argparse |
20 | import os | |
21 | import tempfile | |
22 | from functools import partial | |
23 | ||
24 | from deltatar import tarfile | |
25 | import filesplit | |
26 | ||
27 | def rescue(tar_files, rescue_dir=None): | |
28 | ''' | |
29 | Rescues a multivolume tarfile. Checks file name extension to detect | |
30 | format (compression, etc). Assumes it to be multivolume tar. | |
31 | ''' | |
32 | # setup rescue_dir | |
33 | if isinstance(tar_files, basestring): | |
34 | tar_files = [tar_files] | |
35 | ||
36 | if not isinstance(tar_files, list): | |
37 | raise Exception("tar_files must be a list") | |
38 | ||
39 | for f in tar_files: | |
40 | if not isinstance(f, basestring): | |
41 | raise Exception("tar_files must be a list of strings") | |
42 | if not os.path.exists(f): | |
43 | raise Exception("tar file '%s' doesn't exist" % f) | |
44 | ||
45 | if rescue_dir is None: | |
46 | rescue_dir = os.path.dirname(tar_files[0]) | |
47 | elif rescue_dir is None: | |
48 | rescue_dir = tempfile.mkdtemp() | |
49 | ||
50 | # autodetect file type by extension | |
51 | first_tar_file = tar_files[0] | |
52 | if first_tar_file.endswith(".tar.gz"): | |
53 | mode = "r#gz" | |
54 | elif first_tar_file.endswith(".tar"): | |
55 | mode = "r" | |
56 | ||
57 | base_name = os.path.basename(first_tar_file) | |
58 | extract_files = tar_files | |
59 | ||
60 | # num the number of files used in rescue mode. Used to name those files | |
61 | # when creating them. We put num in an object so that it can be referenced | |
62 | # instead of copied inside new_gz partial | |
63 | context = dict(num=0) | |
64 | ||
65 | # divide in compressed tar block files if it's r#gz | |
66 | if mode == "r#gz": | |
67 | extract_files = [] | |
68 | # function used to create each chunk file | |
69 | def new_gz(context, extract_files, prefix, i): | |
70 | path = "%s.%d" %(prefix, context['num']) | |
71 | extract_files.append(path) | |
72 | context['num'] += 1 | |
73 | return open(path, 'w') | |
74 | new_gz = partial(new_gz, context, extract_files) | |
75 | ||
76 | # split in compressed chunks | |
77 | for f in tar_files: | |
78 | filesplit.split_file('\x1f\x8b', | |
79 | os.path.join(rescue_dir, base_name), f, new_gz) | |
80 | ||
81 | # includes volumes already extracted with new_volume_handler | |
82 | already_extracted_vols = [] | |
83 | ||
84 | def new_volume_handler(already_extracted_vols, next_num, tarobj, base_name, volume_number): | |
85 | ''' | |
86 | Handles the new volumes when extracting | |
87 | ''' | |
88 | ||
89 | # handle the special case where the first file is whatever.tar.gz and | |
90 | # the second is whatever.tar.gz.0 | |
91 | base_name_split = base_name.split('.') | |
92 | next_num = 0 | |
93 | try: | |
94 | next_num = int(base_name_split[-1]) + 1 | |
95 | base_name = ".".join(base_name_split[:-1]) | |
96 | except ValueError as e: | |
97 | pass | |
98 | ||
99 | volume_path = "%s.%d" % (base_name, next_num) | |
100 | already_extracted_vols.append(volume_path) | |
101 | tarobj.open_volume(volume_path) | |
102 | ||
103 | new_volume_handler = partial(new_volume_handler, already_extracted_vols) | |
104 | ||
105 | # extract files, as much as possible | |
106 | for f in extract_files: | |
107 | if f in already_extracted_vols: | |
108 | continue | |
109 | try: | |
110 | tarobj = tarfile.TarFile.open(f, mode=mode, | |
111 | new_volume_handler=new_volume_handler) | |
112 | tarobj.extractall() | |
113 | tarobj.close() | |
114 | except: | |
115 | pass | |
116 | ||
117 | if __name__ == "__main__": | |
118 | parser = argparse.ArgumentParser() | |
119 | ||
120 | parser.add_argument("--rescue_dir", help="directory where rescue files " | |
121 | "should be created. /tmp by default") | |
122 | parser.add_argument("tar_files", nargs="+", help="list of files of a " | |
123 | "multitar file to rescue. Assumes format first.extension " | |
124 | "second.extension.0 third.extension.1 ...") | |
125 | ||
126 | args = parser.parse_args() | |
127 | rescue(tar_files=args.tar_files, rescue_dir=args.rescue_dir) |