| 1 | #!/usr/bin/env python3 |
| 2 | |
| 3 | # Copyright (C) 2013 Intra2net AG |
| 4 | # |
| 5 | # This program is free software; you can redistribute it and/or modify |
| 6 | # it under the terms of the GNU Lesser General Public License as published |
| 7 | # by the Free Software Foundation; either version 3 of the License, or |
| 8 | # (at your option) any later version. |
| 9 | # |
| 10 | # This program is distributed in the hope that it will be useful, |
| 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | # GNU Lesser General Public License for more details. |
| 14 | # |
| 15 | # You should have received a copy of the GNU General Public License |
| 16 | # along with this program. If not, see |
| 17 | # <http://www.gnu.org/licenses/lgpl-3.0.html> |
| 18 | |
| 19 | |
| 20 | import argparse |
| 21 | import os |
| 22 | import sys |
| 23 | import tempfile |
| 24 | from functools import partial |
| 25 | |
| 26 | from deltatar import tarfile |
| 27 | from deltatar import crypto |
| 28 | import filesplit |
| 29 | |
| 30 | def rescue(tar_files, rescue_dir=None, password=None): |
| 31 | ''' |
| 32 | Rescues a multivolume tarfile. Checks file name extension to detect |
| 33 | format (compression, etc). Assumes it to be multivolume tar. |
| 34 | ''' |
| 35 | # setup rescue_dir |
| 36 | if isinstance(tar_files, str): |
| 37 | tar_files = [tar_files] |
| 38 | |
| 39 | if not isinstance(tar_files, list): |
| 40 | raise Exception("tar_files must be a list") |
| 41 | |
| 42 | for f in tar_files: |
| 43 | if not isinstance(f, str): |
| 44 | raise Exception("tar_files must be a list of strings") |
| 45 | if not os.path.exists(f): |
| 46 | raise Exception("tar file '%s' doesn't exist" % f) |
| 47 | |
| 48 | if rescue_dir is None: |
| 49 | rescue_dir = os.path.dirname(tar_files[0]) |
| 50 | elif rescue_dir is None: |
| 51 | rescue_dir = tempfile.mkdtemp() |
| 52 | |
| 53 | # autodetect file type by extension |
| 54 | first_tar_file = tar_files[0] |
| 55 | |
| 56 | mode = "r" |
| 57 | decr = None |
| 58 | separator = None |
| 59 | if first_tar_file.endswith(".tar.gz"): |
| 60 | mode = "r#gz" |
| 61 | separator = tarfile.GZ_MAGIC_BYTES |
| 62 | elif first_tar_file.endswith(".tar.gz.pdtcrypt"): |
| 63 | if password is None: |
| 64 | print ("ERROR: tarball is encrypted but no password given", |
| 65 | file=sys.stderr) |
| 66 | return -1 |
| 67 | mode = "r#gz" |
| 68 | decr = crypto.Decrypt (password=password) |
| 69 | separator = crypto.PDTCRYPT_HDR_MAGIC |
| 70 | |
| 71 | base_name = os.path.basename(first_tar_file) |
| 72 | extract_files = tar_files |
| 73 | |
| 74 | # num the number of files used in rescue mode. Used to name those files |
| 75 | # when creating them. We put num in an object so that it can be referenced |
| 76 | # instead of copied inside new_gz partial |
| 77 | context = dict(num=0) |
| 78 | |
| 79 | # divide in compressed tar block files if it's r#gz |
| 80 | if mode == "r#gz": |
| 81 | extract_files = [] |
| 82 | # function used to create each chunk file |
| 83 | def new_gz(context, extract_files, prefix, i): |
| 84 | path = "%s.%d" %(prefix, context['num']) |
| 85 | extract_files.append(path) |
| 86 | context['num'] += 1 |
| 87 | return open(path, 'wb') |
| 88 | new_gz = partial(new_gz, context, extract_files) |
| 89 | |
| 90 | # split in compressed or encrypted chunks, respectively |
| 91 | for f in tar_files: |
| 92 | filesplit.split_file (separator, |
| 93 | os.path.join(rescue_dir, base_name), |
| 94 | f, |
| 95 | new_gz) |
| 96 | |
| 97 | # includes volumes already extracted with new_volume_handler |
| 98 | already_extracted_vols = [] |
| 99 | |
| 100 | def new_volume_handler(already_extracted_vols, next_num, tarobj, base_name, volume_number): |
| 101 | ''' |
| 102 | Handles the new volumes when extracting |
| 103 | ''' |
| 104 | |
| 105 | # handle the special case where the first file is whatever.tar.gz and |
| 106 | # the second is whatever.tar.gz.0 |
| 107 | base_name_split = base_name.split('.') |
| 108 | next_num = 0 |
| 109 | try: |
| 110 | next_num = int(base_name_split[-1]) + 1 |
| 111 | base_name = ".".join(base_name_split[:-1]) |
| 112 | except ValueError as e: |
| 113 | pass |
| 114 | |
| 115 | volume_path = "%s.%d" % (base_name, next_num) |
| 116 | already_extracted_vols.append(volume_path) |
| 117 | tarobj.open_volume(volume_path, encryption=decr) |
| 118 | |
| 119 | new_volume_handler = partial(new_volume_handler, already_extracted_vols) |
| 120 | |
| 121 | # extract files, as much as possible |
| 122 | errs = 0 |
| 123 | for f in extract_files: |
| 124 | if f in already_extracted_vols: |
| 125 | continue |
| 126 | try: |
| 127 | tarobj = tarfile.TarFile.open \ |
| 128 | (f, |
| 129 | mode=mode, |
| 130 | encryption=decr, |
| 131 | new_volume_handler=new_volume_handler) |
| 132 | tarobj.extractall() |
| 133 | tarobj.close() |
| 134 | except Exception as exn: |
| 135 | print ("ERROR: error extracting file ā%sā (%s)" % (f, exn), |
| 136 | file=sys.stderr) |
| 137 | errs += 1 |
| 138 | |
| 139 | if errs > 0: |
| 140 | print ("ERROR: encountered %d errors extracting %s" |
| 141 | % (errs, first_tar_file), file=sys.stderr) |
| 142 | return -1 |
| 143 | |
| 144 | return 0 |
| 145 | |
| 146 | if __name__ == "__main__": |
| 147 | parser = argparse.ArgumentParser() |
| 148 | |
| 149 | parser.add_argument("--rescue-dir", help="directory where rescue files " |
| 150 | "should be created. /tmp by default") |
| 151 | parser.add_argument("--password", |
| 152 | help="password; mandatory for encrypted tarballs") |
| 153 | parser.add_argument("tar_files", nargs="+", help="list of files of a " |
| 154 | "multitar file to rescue. Assumes format first.extension " |
| 155 | "second.extension.0 third.extension.1 ...") |
| 156 | |
| 157 | args = parser.parse_args() |
| 158 | sys.exit (rescue (tar_files=args.tar_files, |
| 159 | rescue_dir=args.rescue_dir, |
| 160 | password=args.password)) |