bpo-32713: Fix tarfile.itn for large/negative float values. (GH-5434)
[python-delta-tar] / rescue_tar.py
CommitLineData
6b2fa38f 1#!/usr/bin/env python3
0112ba0d 2
866c42e6
DGM
3# Copyright (C) 2013 Intra2net AG
4#
494b38aa
DGM
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; either version 3 of the License, or
866c42e6
DGM
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
494b38aa 13# GNU Lesser General Public License for more details.
866c42e6
DGM
14#
15# You should have received a copy of the GNU General Public License
494b38aa
DGM
16# along with this program. If not, see
17# <http://www.gnu.org/licenses/lgpl-3.0.html>
866c42e6
DGM
18
19
0112ba0d
ERE
20import argparse
21import os
838ffb19 22import sys
0112ba0d
ERE
23import tempfile
24from functools import partial
25
26from deltatar import tarfile
838ffb19 27from deltatar import crypto
0112ba0d
ERE
28import filesplit
29
838ffb19 30def rescue(tar_files, rescue_dir=None, password=None):
0112ba0d
ERE
31 '''
32 Rescues a multivolume tarfile. Checks file name extension to detect
33 format (compression, etc). Assumes it to be multivolume tar.
34 '''
35 # setup rescue_dir
be60ffd0 36 if isinstance(tar_files, str):
0112ba0d
ERE
37 tar_files = [tar_files]
38
39 if not isinstance(tar_files, list):
40 raise Exception("tar_files must be a list")
41
42 for f in tar_files:
be60ffd0 43 if not isinstance(f, str):
0112ba0d
ERE
44 raise Exception("tar_files must be a list of strings")
45 if not os.path.exists(f):
46 raise Exception("tar file '%s' doesn't exist" % f)
47
48 if rescue_dir is None:
49 rescue_dir = os.path.dirname(tar_files[0])
50 elif rescue_dir is None:
51 rescue_dir = tempfile.mkdtemp()
52
53 # autodetect file type by extension
54 first_tar_file = tar_files[0]
838ffb19
PG
55
56 mode = "r"
57 decr = None
58 separator = None
0112ba0d
ERE
59 if first_tar_file.endswith(".tar.gz"):
60 mode = "r#gz"
838ffb19
PG
61 separator = tarfile.GZ_MAGIC_BYTES
62 elif first_tar_file.endswith(".tar.gz.pdtcrypt"):
63 if password is None:
64 print ("ERROR: tarball is encrypted but no password given",
65 file=sys.stderr)
66 return -1
67 mode = "r#gz"
68 decr = crypto.Decrypt (password=password)
69 separator = crypto.PDTCRYPT_HDR_MAGIC
0112ba0d
ERE
70
71 base_name = os.path.basename(first_tar_file)
72 extract_files = tar_files
73
74 # num the number of files used in rescue mode. Used to name those files
75 # when creating them. We put num in an object so that it can be referenced
76 # instead of copied inside new_gz partial
77 context = dict(num=0)
78
79 # divide in compressed tar block files if it's r#gz
80 if mode == "r#gz":
81 extract_files = []
82 # function used to create each chunk file
83 def new_gz(context, extract_files, prefix, i):
84 path = "%s.%d" %(prefix, context['num'])
85 extract_files.append(path)
86 context['num'] += 1
be60ffd0 87 return open(path, 'wb')
0112ba0d
ERE
88 new_gz = partial(new_gz, context, extract_files)
89
838ffb19 90 # split in compressed or encrypted chunks, respectively
0112ba0d 91 for f in tar_files:
838ffb19
PG
92 filesplit.split_file (separator,
93 os.path.join(rescue_dir, base_name),
94 f,
95 new_gz)
0112ba0d
ERE
96
97 # includes volumes already extracted with new_volume_handler
98 already_extracted_vols = []
99
100 def new_volume_handler(already_extracted_vols, next_num, tarobj, base_name, volume_number):
101 '''
102 Handles the new volumes when extracting
103 '''
104
105 # handle the special case where the first file is whatever.tar.gz and
106 # the second is whatever.tar.gz.0
107 base_name_split = base_name.split('.')
108 next_num = 0
109 try:
110 next_num = int(base_name_split[-1]) + 1
111 base_name = ".".join(base_name_split[:-1])
112 except ValueError as e:
113 pass
114
115 volume_path = "%s.%d" % (base_name, next_num)
116 already_extracted_vols.append(volume_path)
838ffb19 117 tarobj.open_volume(volume_path, encryption=decr)
0112ba0d
ERE
118
119 new_volume_handler = partial(new_volume_handler, already_extracted_vols)
120
121 # extract files, as much as possible
838ffb19 122 errs = 0
0112ba0d
ERE
123 for f in extract_files:
124 if f in already_extracted_vols:
125 continue
126 try:
838ffb19
PG
127 tarobj = tarfile.TarFile.open \
128 (f,
129 mode=mode,
130 encryption=decr,
131 new_volume_handler=new_volume_handler)
0112ba0d
ERE
132 tarobj.extractall()
133 tarobj.close()
838ffb19
PG
134 except Exception as exn:
135 print ("ERROR: error extracting file ā€œ%sā€ (%s)" % (f, exn),
136 file=sys.stderr)
137 errs += 1
138
139 if errs > 0:
140 print ("ERROR: encountered %d errors extracting %s"
141 % (errs, first_tar_file), file=sys.stderr)
142 return -1
143
144 return 0
0112ba0d
ERE
145
146if __name__ == "__main__":
147 parser = argparse.ArgumentParser()
148
838ffb19 149 parser.add_argument("--rescue-dir", help="directory where rescue files "
0112ba0d 150 "should be created. /tmp by default")
838ffb19
PG
151 parser.add_argument("--password",
152 help="password; mandatory for encrypted tarballs")
0112ba0d
ERE
153 parser.add_argument("tar_files", nargs="+", help="list of files of a "
154 "multitar file to rescue. Assumes format first.extension "
155 "second.extension.0 third.extension.1 ...")
156
157 args = parser.parse_args()
838ffb19
PG
158 sys.exit (rescue (tar_files=args.tar_files,
159 rescue_dir=args.rescue_dir,
160 password=args.password))