#!/usr/bin/env python3 # Copyright (C) 2013 Intra2net AG # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see # import argparse import os import sys import tempfile from functools import partial from deltatar import tarfile from deltatar import crypto import filesplit def rescue(tar_files, rescue_dir=None, password=None): ''' Rescues a multivolume tarfile. Checks file name extension to detect format (compression, etc). Assumes it to be multivolume tar. ''' # setup rescue_dir if isinstance(tar_files, str): tar_files = [tar_files] if not isinstance(tar_files, list): raise Exception("tar_files must be a list") for f in tar_files: if not isinstance(f, str): raise Exception("tar_files must be a list of strings") if not os.path.exists(f): raise Exception("tar file '%s' doesn't exist" % f) if rescue_dir is None: rescue_dir = os.path.dirname(tar_files[0]) elif rescue_dir is None: rescue_dir = tempfile.mkdtemp() # autodetect file type by extension first_tar_file = tar_files[0] mode = "r" decr = None separator = None if first_tar_file.endswith(".tar.gz"): mode = "r#gz" separator = tarfile.GZ_MAGIC_BYTES elif first_tar_file.endswith(".tar.gz.pdtcrypt"): if password is None: print ("ERROR: tarball is encrypted but no password given", file=sys.stderr) return -1 mode = "r#gz" decr = crypto.Decrypt (password=password) separator = crypto.PDTCRYPT_HDR_MAGIC base_name = os.path.basename(first_tar_file) extract_files = tar_files # num the number of files used in rescue mode. Used to name those files # when creating them. We put num in an object so that it can be referenced # instead of copied inside new_gz partial context = dict(num=0) # divide in compressed tar block files if it's r#gz if mode == "r#gz": extract_files = [] # function used to create each chunk file def new_gz(context, extract_files, prefix, i): path = "%s.%d" %(prefix, context['num']) extract_files.append(path) context['num'] += 1 return open(path, 'wb') new_gz = partial(new_gz, context, extract_files) # split in compressed or encrypted chunks, respectively for f in tar_files: filesplit.split_file (separator, os.path.join(rescue_dir, base_name), f, new_gz) # includes volumes already extracted with new_volume_handler already_extracted_vols = [] def new_volume_handler(already_extracted_vols, next_num, tarobj, base_name, volume_number): ''' Handles the new volumes when extracting ''' # handle the special case where the first file is whatever.tar.gz and # the second is whatever.tar.gz.0 base_name_split = base_name.split('.') next_num = 0 try: next_num = int(base_name_split[-1]) + 1 base_name = ".".join(base_name_split[:-1]) except ValueError as e: pass volume_path = "%s.%d" % (base_name, next_num) already_extracted_vols.append(volume_path) tarobj.open_volume(volume_path, encryption=decr) new_volume_handler = partial(new_volume_handler, already_extracted_vols) # extract files, as much as possible errs = 0 for f in extract_files: if f in already_extracted_vols: continue try: tarobj = tarfile.TarFile.open \ (f, mode=mode, encryption=decr, new_volume_handler=new_volume_handler) tarobj.extractall() tarobj.close() except Exception as exn: print ("ERROR: error extracting file ā€œ%sā€ (%s)" % (f, exn), file=sys.stderr) errs += 1 if errs > 0: print ("ERROR: encountered %d errors extracting %s" % (errs, first_tar_file), file=sys.stderr) return -1 return 0 if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--rescue-dir", help="directory where rescue files " "should be created. /tmp by default") parser.add_argument("--password", help="password; mandatory for encrypted tarballs") parser.add_argument("tar_files", nargs="+", help="list of files of a " "multitar file to rescue. Assumes format first.extension " "second.extension.0 third.extension.1 ...") args = parser.parse_args() sys.exit (rescue (tar_files=args.tar_files, rescue_dir=args.rescue_dir, password=args.password))