Commit | Line | Data |
---|---|---|
0112ba0d ERE |
1 | #!/usr/bin/env python |
2 | ||
3 | import argparse | |
4 | import os | |
5 | import tempfile | |
6 | from functools import partial | |
7 | ||
8 | from deltatar import tarfile | |
9 | import filesplit | |
10 | ||
11 | def rescue(tar_files, rescue_dir=None): | |
12 | ''' | |
13 | Rescues a multivolume tarfile. Checks file name extension to detect | |
14 | format (compression, etc). Assumes it to be multivolume tar. | |
15 | ''' | |
16 | # setup rescue_dir | |
17 | if isinstance(tar_files, basestring): | |
18 | tar_files = [tar_files] | |
19 | ||
20 | if not isinstance(tar_files, list): | |
21 | raise Exception("tar_files must be a list") | |
22 | ||
23 | for f in tar_files: | |
24 | if not isinstance(f, basestring): | |
25 | raise Exception("tar_files must be a list of strings") | |
26 | if not os.path.exists(f): | |
27 | raise Exception("tar file '%s' doesn't exist" % f) | |
28 | ||
29 | if rescue_dir is None: | |
30 | rescue_dir = os.path.dirname(tar_files[0]) | |
31 | elif rescue_dir is None: | |
32 | rescue_dir = tempfile.mkdtemp() | |
33 | ||
34 | # autodetect file type by extension | |
35 | first_tar_file = tar_files[0] | |
36 | if first_tar_file.endswith(".tar.gz"): | |
37 | mode = "r#gz" | |
38 | elif first_tar_file.endswith(".tar"): | |
39 | mode = "r" | |
40 | ||
41 | base_name = os.path.basename(first_tar_file) | |
42 | extract_files = tar_files | |
43 | ||
44 | # num the number of files used in rescue mode. Used to name those files | |
45 | # when creating them. We put num in an object so that it can be referenced | |
46 | # instead of copied inside new_gz partial | |
47 | context = dict(num=0) | |
48 | ||
49 | # divide in compressed tar block files if it's r#gz | |
50 | if mode == "r#gz": | |
51 | extract_files = [] | |
52 | # function used to create each chunk file | |
53 | def new_gz(context, extract_files, prefix, i): | |
54 | path = "%s.%d" %(prefix, context['num']) | |
55 | extract_files.append(path) | |
56 | context['num'] += 1 | |
57 | return open(path, 'w') | |
58 | new_gz = partial(new_gz, context, extract_files) | |
59 | ||
60 | # split in compressed chunks | |
61 | for f in tar_files: | |
62 | filesplit.split_file('\x1f\x8b', | |
63 | os.path.join(rescue_dir, base_name), f, new_gz) | |
64 | ||
65 | # includes volumes already extracted with new_volume_handler | |
66 | already_extracted_vols = [] | |
67 | ||
68 | def new_volume_handler(already_extracted_vols, next_num, tarobj, base_name, volume_number): | |
69 | ''' | |
70 | Handles the new volumes when extracting | |
71 | ''' | |
72 | ||
73 | # handle the special case where the first file is whatever.tar.gz and | |
74 | # the second is whatever.tar.gz.0 | |
75 | base_name_split = base_name.split('.') | |
76 | next_num = 0 | |
77 | try: | |
78 | next_num = int(base_name_split[-1]) + 1 | |
79 | base_name = ".".join(base_name_split[:-1]) | |
80 | except ValueError as e: | |
81 | pass | |
82 | ||
83 | volume_path = "%s.%d" % (base_name, next_num) | |
84 | already_extracted_vols.append(volume_path) | |
85 | tarobj.open_volume(volume_path) | |
86 | ||
87 | new_volume_handler = partial(new_volume_handler, already_extracted_vols) | |
88 | ||
89 | # extract files, as much as possible | |
90 | for f in extract_files: | |
91 | if f in already_extracted_vols: | |
92 | continue | |
93 | try: | |
94 | tarobj = tarfile.TarFile.open(f, mode=mode, | |
95 | new_volume_handler=new_volume_handler) | |
96 | tarobj.extractall() | |
97 | tarobj.close() | |
98 | except: | |
99 | pass | |
100 | ||
101 | if __name__ == "__main__": | |
102 | parser = argparse.ArgumentParser() | |
103 | ||
104 | parser.add_argument("--rescue_dir", help="directory where rescue files " | |
105 | "should be created. /tmp by default") | |
106 | parser.add_argument("tar_files", nargs="+", help="list of files of a " | |
107 | "multitar file to rescue. Assumes format first.extension " | |
108 | "second.extension.0 third.extension.1 ...") | |
109 | ||
110 | args = parser.parse_args() | |
111 | rescue(tar_files=args.tar_files, rescue_dir=args.rescue_dir) |