Fixed encryption pad treatment using random pad
[python-delta-tar] / rescue_tar.py
CommitLineData
0112ba0d
ERE
1#!/usr/bin/env python
2
866c42e6
DGM
3# Copyright (C) 2013 Intra2net AG
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18
0112ba0d
ERE
19import argparse
20import os
21import tempfile
22from functools import partial
23
24from deltatar import tarfile
25import filesplit
26
27def rescue(tar_files, rescue_dir=None):
28 '''
29 Rescues a multivolume tarfile. Checks file name extension to detect
30 format (compression, etc). Assumes it to be multivolume tar.
31 '''
32 # setup rescue_dir
33 if isinstance(tar_files, basestring):
34 tar_files = [tar_files]
35
36 if not isinstance(tar_files, list):
37 raise Exception("tar_files must be a list")
38
39 for f in tar_files:
40 if not isinstance(f, basestring):
41 raise Exception("tar_files must be a list of strings")
42 if not os.path.exists(f):
43 raise Exception("tar file '%s' doesn't exist" % f)
44
45 if rescue_dir is None:
46 rescue_dir = os.path.dirname(tar_files[0])
47 elif rescue_dir is None:
48 rescue_dir = tempfile.mkdtemp()
49
50 # autodetect file type by extension
51 first_tar_file = tar_files[0]
52 if first_tar_file.endswith(".tar.gz"):
53 mode = "r#gz"
54 elif first_tar_file.endswith(".tar"):
55 mode = "r"
56
57 base_name = os.path.basename(first_tar_file)
58 extract_files = tar_files
59
60 # num the number of files used in rescue mode. Used to name those files
61 # when creating them. We put num in an object so that it can be referenced
62 # instead of copied inside new_gz partial
63 context = dict(num=0)
64
65 # divide in compressed tar block files if it's r#gz
66 if mode == "r#gz":
67 extract_files = []
68 # function used to create each chunk file
69 def new_gz(context, extract_files, prefix, i):
70 path = "%s.%d" %(prefix, context['num'])
71 extract_files.append(path)
72 context['num'] += 1
73 return open(path, 'w')
74 new_gz = partial(new_gz, context, extract_files)
75
76 # split in compressed chunks
77 for f in tar_files:
78 filesplit.split_file('\x1f\x8b',
79 os.path.join(rescue_dir, base_name), f, new_gz)
80
81 # includes volumes already extracted with new_volume_handler
82 already_extracted_vols = []
83
84 def new_volume_handler(already_extracted_vols, next_num, tarobj, base_name, volume_number):
85 '''
86 Handles the new volumes when extracting
87 '''
88
89 # handle the special case where the first file is whatever.tar.gz and
90 # the second is whatever.tar.gz.0
91 base_name_split = base_name.split('.')
92 next_num = 0
93 try:
94 next_num = int(base_name_split[-1]) + 1
95 base_name = ".".join(base_name_split[:-1])
96 except ValueError as e:
97 pass
98
99 volume_path = "%s.%d" % (base_name, next_num)
100 already_extracted_vols.append(volume_path)
101 tarobj.open_volume(volume_path)
102
103 new_volume_handler = partial(new_volume_handler, already_extracted_vols)
104
105 # extract files, as much as possible
106 for f in extract_files:
107 if f in already_extracted_vols:
108 continue
109 try:
110 tarobj = tarfile.TarFile.open(f, mode=mode,
111 new_volume_handler=new_volume_handler)
112 tarobj.extractall()
113 tarobj.close()
114 except:
115 pass
116
117if __name__ == "__main__":
118 parser = argparse.ArgumentParser()
119
120 parser.add_argument("--rescue_dir", help="directory where rescue files "
121 "should be created. /tmp by default")
122 parser.add_argument("tar_files", nargs="+", help="list of files of a "
123 "multitar file to rescue. Assumes format first.extension "
124 "second.extension.0 third.extension.1 ...")
125
126 args = parser.parse_args()
127 rescue(tar_files=args.tar_files, rescue_dir=args.rescue_dir)