Using aes128 for encryption
[python-delta-tar] / rescue_tar.py
CommitLineData
0112ba0d
ERE
1#!/usr/bin/env python
2
3import argparse
4import os
5import tempfile
6from functools import partial
7
8from deltatar import tarfile
9import filesplit
10
11def rescue(tar_files, rescue_dir=None):
12 '''
13 Rescues a multivolume tarfile. Checks file name extension to detect
14 format (compression, etc). Assumes it to be multivolume tar.
15 '''
16 # setup rescue_dir
17 if isinstance(tar_files, basestring):
18 tar_files = [tar_files]
19
20 if not isinstance(tar_files, list):
21 raise Exception("tar_files must be a list")
22
23 for f in tar_files:
24 if not isinstance(f, basestring):
25 raise Exception("tar_files must be a list of strings")
26 if not os.path.exists(f):
27 raise Exception("tar file '%s' doesn't exist" % f)
28
29 if rescue_dir is None:
30 rescue_dir = os.path.dirname(tar_files[0])
31 elif rescue_dir is None:
32 rescue_dir = tempfile.mkdtemp()
33
34 # autodetect file type by extension
35 first_tar_file = tar_files[0]
36 if first_tar_file.endswith(".tar.gz"):
37 mode = "r#gz"
38 elif first_tar_file.endswith(".tar"):
39 mode = "r"
40
41 base_name = os.path.basename(first_tar_file)
42 extract_files = tar_files
43
44 # num the number of files used in rescue mode. Used to name those files
45 # when creating them. We put num in an object so that it can be referenced
46 # instead of copied inside new_gz partial
47 context = dict(num=0)
48
49 # divide in compressed tar block files if it's r#gz
50 if mode == "r#gz":
51 extract_files = []
52 # function used to create each chunk file
53 def new_gz(context, extract_files, prefix, i):
54 path = "%s.%d" %(prefix, context['num'])
55 extract_files.append(path)
56 context['num'] += 1
57 return open(path, 'w')
58 new_gz = partial(new_gz, context, extract_files)
59
60 # split in compressed chunks
61 for f in tar_files:
62 filesplit.split_file('\x1f\x8b',
63 os.path.join(rescue_dir, base_name), f, new_gz)
64
65 # includes volumes already extracted with new_volume_handler
66 already_extracted_vols = []
67
68 def new_volume_handler(already_extracted_vols, next_num, tarobj, base_name, volume_number):
69 '''
70 Handles the new volumes when extracting
71 '''
72
73 # handle the special case where the first file is whatever.tar.gz and
74 # the second is whatever.tar.gz.0
75 base_name_split = base_name.split('.')
76 next_num = 0
77 try:
78 next_num = int(base_name_split[-1]) + 1
79 base_name = ".".join(base_name_split[:-1])
80 except ValueError as e:
81 pass
82
83 volume_path = "%s.%d" % (base_name, next_num)
84 already_extracted_vols.append(volume_path)
85 tarobj.open_volume(volume_path)
86
87 new_volume_handler = partial(new_volume_handler, already_extracted_vols)
88
89 # extract files, as much as possible
90 for f in extract_files:
91 if f in already_extracted_vols:
92 continue
93 try:
94 tarobj = tarfile.TarFile.open(f, mode=mode,
95 new_volume_handler=new_volume_handler)
96 tarobj.extractall()
97 tarobj.close()
98 except:
99 pass
100
101if __name__ == "__main__":
102 parser = argparse.ArgumentParser()
103
104 parser.add_argument("--rescue_dir", help="directory where rescue files "
105 "should be created. /tmp by default")
106 parser.add_argument("tar_files", nargs="+", help="list of files of a "
107 "multitar file to rescue. Assumes format first.extension "
108 "second.extension.0 third.extension.1 ...")
109
110 args = parser.parse_args()
111 rescue(tar_files=args.tar_files, rescue_dir=args.rescue_dir)