Commit | Line | Data |
---|---|---|
6b2fa38f | 1 | #!/usr/bin/env python3 |
0112ba0d | 2 | |
866c42e6 DGM |
3 | # Copyright (C) 2013 Intra2net AG |
4 | # | |
494b38aa DGM |
5 | # This program is free software; you can redistribute it and/or modify |
6 | # it under the terms of the GNU Lesser General Public License as published | |
7 | # by the Free Software Foundation; either version 3 of the License, or | |
866c42e6 DGM |
8 | # (at your option) any later version. |
9 | # | |
10 | # This program is distributed in the hope that it will be useful, | |
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
494b38aa | 13 | # GNU Lesser General Public License for more details. |
866c42e6 DGM |
14 | # |
15 | # You should have received a copy of the GNU General Public License | |
494b38aa DGM |
16 | # along with this program. If not, see |
17 | # <http://www.gnu.org/licenses/lgpl-3.0.html> | |
866c42e6 DGM |
18 | |
19 | ||
0112ba0d ERE |
20 | import argparse |
21 | import os | |
22 | import tempfile | |
23 | from functools import partial | |
24 | ||
25 | from deltatar import tarfile | |
26 | import filesplit | |
27 | ||
28 | def rescue(tar_files, rescue_dir=None): | |
29 | ''' | |
30 | Rescues a multivolume tarfile. Checks file name extension to detect | |
31 | format (compression, etc). Assumes it to be multivolume tar. | |
32 | ''' | |
33 | # setup rescue_dir | |
be60ffd0 | 34 | if isinstance(tar_files, str): |
0112ba0d ERE |
35 | tar_files = [tar_files] |
36 | ||
37 | if not isinstance(tar_files, list): | |
38 | raise Exception("tar_files must be a list") | |
39 | ||
40 | for f in tar_files: | |
be60ffd0 | 41 | if not isinstance(f, str): |
0112ba0d ERE |
42 | raise Exception("tar_files must be a list of strings") |
43 | if not os.path.exists(f): | |
44 | raise Exception("tar file '%s' doesn't exist" % f) | |
45 | ||
46 | if rescue_dir is None: | |
47 | rescue_dir = os.path.dirname(tar_files[0]) | |
48 | elif rescue_dir is None: | |
49 | rescue_dir = tempfile.mkdtemp() | |
50 | ||
51 | # autodetect file type by extension | |
52 | first_tar_file = tar_files[0] | |
53 | if first_tar_file.endswith(".tar.gz"): | |
54 | mode = "r#gz" | |
55 | elif first_tar_file.endswith(".tar"): | |
56 | mode = "r" | |
57 | ||
58 | base_name = os.path.basename(first_tar_file) | |
59 | extract_files = tar_files | |
60 | ||
61 | # num the number of files used in rescue mode. Used to name those files | |
62 | # when creating them. We put num in an object so that it can be referenced | |
63 | # instead of copied inside new_gz partial | |
64 | context = dict(num=0) | |
65 | ||
66 | # divide in compressed tar block files if it's r#gz | |
67 | if mode == "r#gz": | |
68 | extract_files = [] | |
69 | # function used to create each chunk file | |
70 | def new_gz(context, extract_files, prefix, i): | |
71 | path = "%s.%d" %(prefix, context['num']) | |
72 | extract_files.append(path) | |
73 | context['num'] += 1 | |
be60ffd0 | 74 | return open(path, 'wb') |
0112ba0d ERE |
75 | new_gz = partial(new_gz, context, extract_files) |
76 | ||
77 | # split in compressed chunks | |
78 | for f in tar_files: | |
d601d33b | 79 | filesplit.split_file(tarfile.GZ_MAGIC_BYTES, |
0112ba0d ERE |
80 | os.path.join(rescue_dir, base_name), f, new_gz) |
81 | ||
82 | # includes volumes already extracted with new_volume_handler | |
83 | already_extracted_vols = [] | |
84 | ||
85 | def new_volume_handler(already_extracted_vols, next_num, tarobj, base_name, volume_number): | |
86 | ''' | |
87 | Handles the new volumes when extracting | |
88 | ''' | |
89 | ||
90 | # handle the special case where the first file is whatever.tar.gz and | |
91 | # the second is whatever.tar.gz.0 | |
92 | base_name_split = base_name.split('.') | |
93 | next_num = 0 | |
94 | try: | |
95 | next_num = int(base_name_split[-1]) + 1 | |
96 | base_name = ".".join(base_name_split[:-1]) | |
97 | except ValueError as e: | |
98 | pass | |
99 | ||
100 | volume_path = "%s.%d" % (base_name, next_num) | |
101 | already_extracted_vols.append(volume_path) | |
102 | tarobj.open_volume(volume_path) | |
103 | ||
104 | new_volume_handler = partial(new_volume_handler, already_extracted_vols) | |
105 | ||
106 | # extract files, as much as possible | |
107 | for f in extract_files: | |
108 | if f in already_extracted_vols: | |
109 | continue | |
110 | try: | |
111 | tarobj = tarfile.TarFile.open(f, mode=mode, | |
112 | new_volume_handler=new_volume_handler) | |
113 | tarobj.extractall() | |
114 | tarobj.close() | |
115 | except: | |
116 | pass | |
117 | ||
118 | if __name__ == "__main__": | |
119 | parser = argparse.ArgumentParser() | |
120 | ||
121 | parser.add_argument("--rescue_dir", help="directory where rescue files " | |
122 | "should be created. /tmp by default") | |
123 | parser.add_argument("tar_files", nargs="+", help="list of files of a " | |
124 | "multitar file to rescue. Assumes format first.extension " | |
125 | "second.extension.0 third.extension.1 ...") | |
126 | ||
127 | args = parser.parse_args() | |
128 | rescue(tar_files=args.tar_files, rescue_dir=args.rescue_dir) |