Commit | Line | Data |
---|---|---|
6b2fa38f | 1 | #!/usr/bin/env python3 |
0708a374 | 2 | |
51797cd6 | 3 | # Copyright (C) 2013, 2014 Intra2net AG |
0708a374 ERE |
4 | # |
5 | # This program is free software; you can redistribute it and/or modify | |
6 | # it under the terms of the GNU Lesser General Public License as published | |
7 | # by the Free Software Foundation; either version 3 of the License, or | |
8 | # (at your option) any later version. | |
9 | # | |
10 | # This program is distributed in the hope that it will be useful, | |
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | # GNU Lesser General Public License for more details. | |
14 | # | |
15 | # You should have received a copy of the GNU General Public License | |
16 | # along with this program. If not, see | |
17 | # <http://www.gnu.org/licenses/lgpl-3.0.html> | |
18 | ||
938c2d54 PG |
19 | DELTATAR_HEADER_VERSION = 1 |
20 | DELTATAR_PARAMETER_VERSION = 1 | |
3fdea6d4 | 21 | |
0708a374 ERE |
22 | import logging |
23 | import datetime | |
6c678f3a | 24 | import binascii |
938c2d54 | 25 | import io |
0501fe0a | 26 | import operator |
0708a374 | 27 | import os |
0501fe0a | 28 | import copy |
82de3376 | 29 | import shutil |
8a8fadda | 30 | import re |
e82f14f5 ERE |
31 | import stat |
32 | import json | |
c9ee0159 | 33 | import typing |
0708a374 ERE |
34 | from functools import partial |
35 | ||
36 | from . import tarfile | |
2ae46844 | 37 | from . import crypto |
0708a374 | 38 | |
0708a374 ERE |
39 | class NullHandler(logging.Handler): |
40 | def emit(self, record): | |
41 | pass | |
24ddf0a2 ERE |
42 | |
43 | ||
0708a374 ERE |
44 | logging.getLogger("deltatar.DeltaTar").addHandler(NullHandler()) |
45 | ||
974408b5 ERE |
46 | |
47 | # match mode | |
48 | NO_MATCH = False | |
49 | MATCH = True | |
50 | PARENT_MATCH = 2 | |
51 | ||
133d30da PG |
52 | # encryption direction |
53 | CRYPTO_MODE_ENCRYPT = 0 | |
54 | CRYPTO_MODE_DECRYPT = 1 | |
55 | ||
13cc7dfc PG |
56 | # The canonical extension for encrypted backup files regardless of the actual |
57 | # encryption parameters is “.pdtcrypt”. This is analogous to the encryption | |
58 | # header which starts with the eight ASCII bytes “PDTCRYPT”. Historical note: | |
59 | # Since the introduction of the versioned header there no longer any need | |
60 | # for encoding encryption parameters in the file extensions (“.aes128” and | |
61 | # suchlike). | |
62 | PDTCRYPT_EXTENSION = "pdtcrypt" | |
2cdd9faf PG |
63 | PDT_TYPE_ARCHIVE = 0 |
64 | PDT_TYPE_AUX = 1 | |
13cc7dfc | 65 | |
9eccb1c2 PG |
66 | AUXILIARY_FILE_INDEX = 0 |
67 | AUXILIARY_FILE_INFO = 1 | |
68 | ||
0708a374 ERE |
69 | class DeltaTar(object): |
70 | ''' | |
71 | Backup class used to create backups | |
72 | ''' | |
73 | ||
74 | # list of files to exclude in the backup creation or restore operation. It | |
75 | # can contain python regular expressions. | |
76 | excluded_files = [] | |
77 | ||
78 | # list of files to include in the backup creation or restore operation. It | |
79 | # can contain python regular expressions. If empty, all files in the source | |
80 | # path will be backed up (when creating a backup) or all the files in the | |
a83fa4ed | 81 | # backup will be restored (when restoring a backup), but if included_files |
0708a374 ERE |
82 | # is set then only the files include in the list will be processed. |
83 | included_files = [] | |
84 | ||
85 | # custom filter of files to be backed up (or restored). Unused and unset | |
86 | # by default. The function receives a file path and must return a boolean. | |
87 | filter_func = None | |
88 | ||
da26094a ERE |
89 | # mode in which the delta will be created (when creating a backup) or |
90 | # opened (when restoring). Accepts modes analog to the tarfile library. | |
91 | mode = "" | |
0708a374 ERE |
92 | |
93 | # used together with aes modes to encrypt and decrypt backups. | |
94 | password = None | |
1f3fd7b0 PG |
95 | crypto_key = None |
96 | nacl = None | |
0708a374 | 97 | |
dbee011c PG |
98 | # parameter version to use when encrypting; note that this has no effect |
99 | # on decryption since the required settings are determined from the headers | |
54f909ca | 100 | crypto_version = DELTATAR_HEADER_VERSION |
dbee011c PG |
101 | crypto_paramversion = None |
102 | ||
133d30da | 103 | # when encrypting or decrypting, these hold crypto handlers; created before |
2ae46844 | 104 | # establishing the Tarfile stream iff a password is supplied. |
133d30da PG |
105 | encryptor = None |
106 | decryptor = None | |
2ae46844 | 107 | |
0708a374 ERE |
108 | # python logger object. |
109 | logger = None | |
110 | ||
3a7e1a50 ERE |
111 | # specifies the index mode in the same format as @param mode, but without |
112 | # the ':', '|' or '#' at the begining. It doesn't make sense to specify | |
2ae46844 | 113 | # that the index is encrypted if no password is given in the constructor. |
3a7e1a50 | 114 | index_mode = None |
0708a374 ERE |
115 | |
116 | # current time for this backup. Used for file names and file creation checks | |
117 | current_time = None | |
118 | ||
9eae9a1f ERE |
119 | # extra data to included in the header of the index file when creating a |
120 | # backup | |
121 | extra_data = dict() | |
122 | ||
0708a374 ERE |
123 | # valid tarfile modes and their corresponding default file extension |
124 | __file_extensions_dict = { | |
da26094a ERE |
125 | '': '', |
126 | ':': '', | |
127 | ':gz': '.gz', | |
128 | ':bz2': '.bz2', | |
129 | '|': '', | |
130 | '|gz': '.gz', | |
131 | '|bz2': '.bz2', | |
132 | '#gz': '.gz', | |
6e99d23a PG |
133 | '#gz.pdtcrypt': '.gz', |
134 | '#pdtcrypt': '', | |
d1c38f40 | 135 | '#': '', |
0708a374 ERE |
136 | } |
137 | ||
3a7e1a50 ERE |
138 | # valid index modes and their corresponding default file extension |
139 | __index_extensions_dict = { | |
140 | '': '', | |
141 | 'gz': '.gz', | |
142 | 'bz2': '.bz2', | |
6e99d23a PG |
143 | 'gz.pdtcrypt': '.gz', |
144 | 'pdtcrypt': '', | |
3a7e1a50 ERE |
145 | } |
146 | ||
8adbe50d ERE |
147 | # valid path prefixes |
148 | __path_prefix_list = [ | |
149 | u'snapshot://', | |
150 | u'list://', | |
151 | u'delete://' | |
152 | ] | |
153 | ||
0708a374 | 154 | def __init__(self, excluded_files=[], included_files=[], |
da26094a | 155 | filter_func=None, mode="", password=None, |
1f3fd7b0 | 156 | crypto_key=None, nacl=None, |
54f909ca | 157 | crypto_version=DELTATAR_HEADER_VERSION, |
dbee011c | 158 | crypto_paramversion=DELTATAR_PARAMETER_VERSION, |
3a7e1a50 | 159 | logger=None, index_mode=None, index_name_func=None, |
0708a374 ERE |
160 | volume_name_func=None): |
161 | ''' | |
162 | Constructor. Configures the diff engine. | |
163 | ||
164 | Parameters: | |
165 | - excluded_files: list of files to exclude in the backup creation or | |
166 | restore operation. It can contain python regular expressions. | |
167 | ||
168 | - included_files: list of files to include in the backup creation or | |
169 | restore operation. It can contain python regular expressions. If | |
170 | empty, all files in the source path will be backed up (when creating a | |
171 | backup) or all the files in the backup will be restored (when | |
a83fa4ed | 172 | restoring a backup), but if included_files is set then only the files |
0708a374 ERE |
173 | include in the list will be processed. |
174 | ||
175 | - filter_func: custom filter of files to be backed up (or restored). | |
176 | Unused and unset by default. The function receives a file path and | |
177 | must return a boolean. | |
178 | ||
179 | - mode: mode in which the delta will be created (when creating a backup) | |
180 | or opened (when restoring). Accepts the same modes as the tarfile | |
181 | library. Valid modes are: | |
182 | ||
da26094a ERE |
183 | '' open uncompressed |
184 | ':' open uncompressed | |
185 | ':gz' open with gzip compression | |
186 | ':bz2' open with bzip2 compression | |
187 | '|' open an uncompressed stream of tar blocks | |
188 | '|gz' open a gzip compressed stream of tar blocks | |
189 | '|bz2' open a bzip2 compressed stream of tar blocks | |
190 | '#gz' open a stream of gzip compressed tar blocks | |
0708a374 | 191 | |
1f3fd7b0 PG |
192 | - crypto_key: used to encrypt and decrypt backups. Encryption will |
193 | be enabled automatically if a key is supplied. Requires a salt to be | |
194 | passed as well. | |
195 | ||
196 | - nacl: salt that was used to derive the encryption key for embedding | |
197 | in the PDTCRYPT header. Not needed when decrypting and when | |
198 | encrypting with password. | |
199 | ||
6e99d23a PG |
200 | - password: used to encrypt and decrypt backups. Encryption will be |
201 | enabled automatically if a password is supplied. | |
0708a374 | 202 | |
54f909ca PG |
203 | - crypto_version: version of the format, determining the kind of PDT |
204 | object header. | |
205 | ||
dbee011c PG |
206 | - crypto_paramversion: optionally request encryption conforming to |
207 | a specific parameter version. Defaults to the standard PDT value | |
208 | which as of 2017 is the only one available. | |
209 | ||
0708a374 ERE |
210 | - logger: python logger object. Optional. |
211 | ||
3a7e1a50 | 212 | - index_mode: specifies the index mode in the same format as @param |
6e99d23a PG |
213 | mode, but without the ':', '|' or '#' at the begining. If encryption |
214 | is requested it will extend to the auxiliary (index, info) files as | |
215 | well. This is an optional parameter that will automatically mimic | |
216 | @param mode by default if not provided. Valid modes are: | |
3a7e1a50 ERE |
217 | |
218 | '' open uncompressed | |
219 | 'gz' open with gzip compression | |
220 | 'bz2' open with bzip2 compression | |
0708a374 ERE |
221 | |
222 | - index_name_func: function that sets a custom name for the index file. | |
2cc6e32b PG |
223 | This function receives a flag to indicate whether the name will be |
224 | used for a full or diff backup. The backup path will be prepended to | |
225 | its return value. | |
0708a374 ERE |
226 | |
227 | - volume_name_func: function that defines the name of tar volumes. It | |
228 | receives the backup_path, if it's a full backup and the volume number, | |
229 | and must return the name for the corresponding volume name. Optional, | |
230 | DeltaTar has default names for tar volumes. | |
231 | ''' | |
232 | ||
da26094a | 233 | if mode not in self.__file_extensions_dict: |
8a54d5dd PG |
234 | raise Exception('Unrecognized extension mode=[%s] requested for files' |
235 | % str(mode)) | |
0708a374 ERE |
236 | |
237 | self.excluded_files = excluded_files | |
238 | self.included_files = included_files | |
239 | self.filter_func = filter_func | |
240 | self.logger = logging.getLogger('deltatar.DeltaTar') | |
241 | if logger: | |
242 | self.logger.addHandler(logger) | |
243 | self.mode = mode | |
2ae46844 | 244 | |
1f3fd7b0 PG |
245 | if crypto_key is not None: |
246 | self.crypto_key = crypto_key | |
247 | self.nacl = nacl # encryption only | |
248 | ||
2ae46844 PG |
249 | if password is not None: |
250 | self.password = password | |
3a7e1a50 | 251 | |
54f909ca PG |
252 | if crypto_version is not None: |
253 | self.crypto_version = crypto_version | |
254 | ||
dbee011c PG |
255 | if crypto_paramversion is not None: |
256 | self.crypto_paramversion = crypto_paramversion | |
257 | ||
3a7e1a50 ERE |
258 | # generate index_mode |
259 | if index_mode is None: | |
260 | index_mode = '' | |
6e99d23a | 261 | if 'gz' in mode: |
3a7e1a50 ERE |
262 | index_mode = "gz" |
263 | elif 'bz2' in mode: | |
264 | index_mode = "bz2" | |
265 | elif mode not in self.__index_extensions_dict: | |
8a54d5dd PG |
266 | raise Exception('Unrecognized extension mode=[%s] requested for index' |
267 | % str(mode)) | |
3a7e1a50 ERE |
268 | |
269 | self.index_mode = index_mode | |
0708a374 ERE |
270 | self.current_time = datetime.datetime.now() |
271 | ||
272 | if index_name_func is not None: | |
273 | self.index_name_func = index_name_func | |
274 | ||
275 | if volume_name_func is not None: | |
276 | self.volume_name_func = volume_name_func | |
277 | ||
e54cfec5 | 278 | def pick_extension(self, kind, mode=None): |
2cdd9faf PG |
279 | """ |
280 | Choose the extension depending on a) the kind of file given, b) the | |
281 | processing mode, and c) the current encryption settings. | |
282 | """ | |
283 | ret = "" | |
284 | if kind == PDT_TYPE_ARCHIVE: | |
285 | ret += ".tar" | |
e54cfec5 PG |
286 | if mode is None: |
287 | mode = self.__index_extensions_dict [self.index_mode] | |
2cdd9faf | 288 | ret += mode |
a83fa4ed | 289 | if self.crypto_key is not None or self.password is not None: |
2cdd9faf PG |
290 | ret += "." + PDTCRYPT_EXTENSION |
291 | return ret | |
292 | ||
f0287fb7 | 293 | def index_name_func(self, is_full): # pylint: disable=method-hidden |
0708a374 | 294 | ''' |
2cc6e32b PG |
295 | Callback for setting a custom name for the index file. Depending on |
296 | whether *is_full* is set, it will create a suitable name for a full | |
297 | or a diff backup. | |
0708a374 ERE |
298 | ''' |
299 | prefix = "bfull" if is_full else "bdiff" | |
f7940c31 | 300 | date_str = self.current_time.strftime("%Y-%m-%d-%H%M") |
2cdd9faf PG |
301 | extension = self.pick_extension \ |
302 | (PDT_TYPE_AUX, | |
303 | self.__index_extensions_dict [self.index_mode]) | |
0708a374 | 304 | |
da26094a | 305 | return "%s-%s.index%s" % (prefix, date_str, extension) |
0708a374 | 306 | |
f0287fb7 CH |
307 | def volume_name_func(self, backup_path, # pylint: disable=method-hidden |
308 | is_full, volume_number, | |
309 | guess_name=False): | |
0708a374 ERE |
310 | ''' |
311 | function that defines the name of tar volumes. It receives the | |
312 | backup_path, if it's a full backup and the volume number, and must return | |
313 | the name for the corresponding volume name. Optional, DeltaTar has default | |
314 | names for tar volumes. | |
df86af81 ERE |
315 | |
316 | If guess_name is activated, the file is intended not to be created but | |
317 | to be found, and thus the date will be guessed. | |
0708a374 ERE |
318 | ''' |
319 | prefix = "bfull" if is_full else "bdiff" | |
2cdd9faf PG |
320 | extension = self.pick_extension \ |
321 | (PDT_TYPE_ARCHIVE, | |
322 | self.__file_extensions_dict [self.mode]) | |
0708a374 | 323 | |
df86af81 | 324 | if not guess_name: |
f7940c31 | 325 | date_str = self.current_time.strftime("%Y-%m-%d-%H%M") |
2cdd9faf | 326 | return "%s-%s-%03d%s" % (prefix, date_str, volume_number + 1, extension) |
df86af81 ERE |
327 | else: |
328 | prefix = prefix + "-" | |
90b75470 | 329 | postfix = "-%03d%s" % (volume_number + 1, extension) |
86a6e741 ERE |
330 | for f in os.listdir(backup_path): |
331 | if f.startswith(prefix) and f.endswith(postfix): | |
332 | return f | |
df86af81 ERE |
333 | raise Exception("volume not found") |
334 | ||
0708a374 | 335 | |
974408b5 | 336 | def filter_path(self, path, source_path="", is_dir=None): |
8a8fadda ERE |
337 | ''' |
338 | Filters a path, given the source_path, using the filtering properties | |
339 | set in the constructor. | |
340 | The filtering order is: | |
341 | 1. included_files (if any) | |
342 | 2. excluded_files | |
343 | 3. filter_func (which must return whether the file is accepted or not) | |
344 | ''' | |
75059f3c | 345 | |
c1af2184 | 346 | if len(source_path) > 0: |
75059f3c CH |
347 | # ensure that exactly one '/' at end of dir is also removed |
348 | source_path = source_path.rstrip(os.sep) + os.sep | |
8a8fadda ERE |
349 | path = path[len(source_path):] |
350 | ||
351 | # 1. filter included_files | |
974408b5 | 352 | match = MATCH |
8a8fadda | 353 | if len(self.included_files) > 0: |
974408b5 | 354 | match = NO_MATCH |
8a8fadda ERE |
355 | for i in self.included_files: |
356 | # it can be either a regexp or a string | |
be60ffd0 | 357 | if isinstance(i, str): |
8a8fadda ERE |
358 | # if the string matches, then continue |
359 | if i == path: | |
974408b5 | 360 | match = MATCH |
c1af2184 | 361 | break |
8a8fadda ERE |
362 | |
363 | # if the string ends with / it's a directory, and if the | |
7b07645e | 364 | # path is contained in it, it is included |
c1af2184 | 365 | if i.endswith('/') and path.startswith(i): |
974408b5 | 366 | match = MATCH |
c1af2184 | 367 | break |
8a8fadda ERE |
368 | |
369 | # if the string doesn't end with /, add it and do the same | |
370 | # check | |
c1af2184 | 371 | elif path.startswith(i + '/'): |
974408b5 | 372 | match = MATCH |
c1af2184 | 373 | break |
8a8fadda | 374 | |
974408b5 ERE |
375 | # check for PARENT_MATCH |
376 | if is_dir: | |
377 | dir_path = path | |
378 | if not dir_path.endswith('/'): | |
379 | dir_path += '/' | |
380 | ||
381 | if i.startswith(dir_path): | |
382 | match = PARENT_MATCH | |
383 | ||
8a8fadda | 384 | # if it's a reg exp, then we just check if it matches |
c9ee0159 | 385 | elif isinstance(i, typing.Pattern): |
c1af2184 | 386 | if i.match(path): |
974408b5 | 387 | match = MATCH |
c1af2184 | 388 | break |
8a8fadda | 389 | else: |
4bda6f45 | 390 | self.logger.warning('Invalid pattern in included_files: %s' % str(i)) |
8a8fadda | 391 | |
974408b5 ERE |
392 | if match == NO_MATCH: |
393 | return NO_MATCH | |
c1af2184 | 394 | |
974408b5 ERE |
395 | # when a directory is in PARENT_MATCH, it doesn't matter if it's |
396 | # excluded. It's subfiles will be excluded, but the directory itself | |
397 | # won't | |
398 | if match != PARENT_MATCH: | |
8a8fadda ERE |
399 | for e in self.excluded_files: |
400 | # it can be either a regexp or a string | |
be60ffd0 | 401 | if isinstance(e, str): |
8a8fadda | 402 | # if the string matches, then exclude |
c1af2184 | 403 | if e == path: |
974408b5 | 404 | return NO_MATCH |
8a8fadda ERE |
405 | |
406 | # if the string ends with / it's a directory, and if the | |
407 | # path starts with the directory, then exclude | |
c1af2184 | 408 | if e.endswith('/') and path.startswith(e): |
974408b5 | 409 | return NO_MATCH |
8a8fadda ERE |
410 | |
411 | # if the string doesn't end with /, do the same check with | |
412 | # the slash added | |
c1af2184 | 413 | elif path.startswith(e + '/'): |
974408b5 | 414 | return NO_MATCH |
8a8fadda ERE |
415 | |
416 | # if it's a reg exp, then we just check if it matches | |
c9ee0159 | 417 | elif isinstance(e, typing.Pattern): |
c1af2184 | 418 | if e.match(path): |
974408b5 | 419 | return NO_MATCH |
8a8fadda | 420 | else: |
4bda6f45 | 421 | self.logger.warning('Invalid pattern in excluded_files: %s' % str(e)) |
8a8fadda ERE |
422 | |
423 | if self.filter_func: | |
424 | return self.filter_func(path) | |
425 | ||
974408b5 | 426 | return match |
8a8fadda | 427 | |
283fbd5e | 428 | def _recursive_walk_dir(self, source_path, keep_base_dir=False): |
0708a374 ERE |
429 | ''' |
430 | Walk a directory recursively, yielding each file/directory | |
c059a221 PG |
431 | |
432 | Returns the path of an entity. If ``keep_base_dir`` is set, | |
433 | the path returned contains the prefix ``source_path``; otherwise it is | |
434 | relative to the prefix. | |
0708a374 ERE |
435 | ''' |
436 | ||
283fbd5e | 437 | source_path = source_path.rstrip(os.sep) |
0708a374 | 438 | |
283fbd5e | 439 | if keep_base_dir: |
adf7dac4 | 440 | beginning_size = 0 |
283fbd5e CH |
441 | else: |
442 | beginning_size = len(source_path) + 1 # +1 for os.sep | |
443 | ||
444 | queue = [source_path] | |
445 | ||
d07c8065 | 446 | while queue: |
df86af81 | 447 | cur_path = queue.pop(0) |
0708a374 | 448 | |
e76ca7e0 PG |
449 | try: |
450 | dfd = os.open (cur_path, os.O_DIRECTORY) | |
451 | except FileNotFoundError as exn: | |
452 | self.logger.warning ("failed to open entity [%s] as directory; " | |
453 | "file system (error: %s); skipping" | |
454 | % (cur_path, str (exn))) | |
d86735e4 ERE |
455 | continue |
456 | ||
c059a221 PG |
457 | try: |
458 | for filename in sorted(os.listdir(dfd)): | |
459 | child = os.path.join(cur_path, filename) | |
460 | is_dir = os.path.isdir(child) | |
461 | status = self.filter_path(child, source_path, is_dir) | |
462 | if status == NO_MATCH: | |
463 | continue | |
464 | if not os.access(child, os.R_OK): | |
465 | self.logger.warning('Error accessing possibly locked file %s' % child) | |
466 | continue | |
467 | ||
468 | if status == MATCH: | |
469 | yield child[beginning_size:] | |
470 | ||
471 | if is_dir and (status == MATCH or status == PARENT_MATCH): | |
472 | queue.append(child) | |
473 | finally: | |
474 | os.close (dfd) | |
0708a374 | 475 | |
e82f14f5 ERE |
476 | def _stat_dict(self, path): |
477 | ''' | |
478 | Returns a dict with the stat data used to compare files | |
479 | ''' | |
480 | stinfo = os.stat(path) | |
481 | mode = stinfo.st_mode | |
482 | ||
483 | ptype = None | |
484 | if stat.S_ISDIR(mode): | |
d07c8065 | 485 | ptype = u'directory' |
e82f14f5 | 486 | elif stat.S_ISREG(mode): |
d07c8065 | 487 | ptype = u'file' |
e82f14f5 | 488 | elif stat.S_ISLNK(mode): |
d07c8065 | 489 | ptype = u'link' |
e82f14f5 ERE |
490 | |
491 | return { | |
d07c8065 | 492 | u'type': ptype, |
be60ffd0 | 493 | u'path': path, |
d07c8065 | 494 | u'mode': mode, |
0501fe0a ERE |
495 | u'mtime': int(stinfo.st_mtime), |
496 | u'ctime': int(stinfo.st_ctime), | |
d07c8065 ERE |
497 | u'uid': stinfo.st_uid, |
498 | u'gid': stinfo.st_gid, | |
499 | u'inode': stinfo.st_ino, | |
500 | u'size': stinfo.st_size | |
e82f14f5 ERE |
501 | } |
502 | ||
df99a044 | 503 | def _equal_stat_dicts(self, d1, d2, listsnapshot_equal=False): |
d07c8065 ERE |
504 | ''' |
505 | Return if the dicts are equal in the stat keys | |
506 | ''' | |
fc8fdcbc | 507 | keys = [u'type', u'mode',u'size', u'mtime', |
d041935c | 508 | # not restored: u'inode', u'ctime' |
df99a044 | 509 | ] |
8adbe50d | 510 | |
fc8fdcbc | 511 | # only if user is root, then also check gid/uid. otherwise do not check it, |
d041935c | 512 | # because tarfile can chown in case of being superuser only |
50d70ca9 PG |
513 | # |
514 | # also, skip the check in rpmbuild since the sources end up with the | |
515 | # uid:gid of the packager while the extracted files are 0:0. | |
516 | if hasattr(os, "geteuid") and os.geteuid() == 0 \ | |
517 | and os.getenv ("RPMBUILD_OPTIONS") is None: | |
fc8fdcbc ERE |
518 | keys.append('gid') |
519 | keys.append('uid') | |
520 | ||
ea6d3c3e | 521 | if (not d1 and d2 != None) or (d1 != None and not d2): |
8adbe50d ERE |
522 | return False |
523 | ||
cbac9f0b ERE |
524 | if self.prefixed(d1.get('path', -1), listsnapshot_equal) != self.prefixed(d2.get('path', -2), listsnapshot_equal): |
525 | return False | |
8adbe50d | 526 | |
fc8fdcbc ERE |
527 | type = d1.get('type', '') |
528 | ||
d07c8065 | 529 | for key in keys: |
fc8fdcbc ERE |
530 | # size doesn't matter for directories |
531 | if type == 'directory' and key == 'size': | |
532 | continue | |
d07c8065 ERE |
533 | if d1.get(key, -1) != d2.get(key, -2): |
534 | return False | |
535 | return True | |
536 | ||
df99a044 | 537 | def prefixed(self, path, listsnapshot_equal=False): |
8adbe50d ERE |
538 | ''' |
539 | if a path is not prefixed, return it prefixed | |
540 | ''' | |
541 | for prefix in self.__path_prefix_list: | |
542 | if path.startswith(prefix): | |
df99a044 ERE |
543 | if listsnapshot_equal and prefix == u'list://': |
544 | return u'snapshot://' + path[len(prefix):] | |
8adbe50d ERE |
545 | return path |
546 | return u'snapshot://' + path | |
547 | ||
548 | def unprefixed(self, path): | |
549 | ''' | |
550 | remove a path prefix if any | |
551 | ''' | |
552 | for prefix in self.__path_prefix_list: | |
553 | if path.startswith(prefix): | |
554 | return path[len(prefix):] | |
555 | return path | |
556 | ||
133d30da | 557 | |
b750b280 PG |
558 | def initialize_encryption (self, mode, strict_validation=True): |
559 | """ | |
560 | :type strict_validation: bool | |
561 | :param strict_validation: Enable strict IV checking in the crypto | |
562 | layer. Should be disabled when dealing with | |
563 | potentially corrupted data. | |
564 | """ | |
133d30da | 565 | password = self.password |
1f3fd7b0 PG |
566 | key = self.crypto_key |
567 | nacl = self.nacl | |
133d30da | 568 | |
1f3fd7b0 | 569 | if key is None and password is None: |
133d30da PG |
570 | return |
571 | if mode == CRYPTO_MODE_ENCRYPT: | |
1f3fd7b0 PG |
572 | return crypto.Encrypt (password=password, |
573 | key=key, | |
574 | nacl=nacl, | |
54f909ca | 575 | version=self.crypto_version, |
774ca538 | 576 | paramversion=self.crypto_paramversion) |
133d30da | 577 | if mode == CRYPTO_MODE_DECRYPT: |
b750b280 PG |
578 | return crypto.Decrypt (password=password, key=key, |
579 | strict_ivs=strict_validation) | |
133d30da PG |
580 | |
581 | raise Exception ("invalid encryption mode [%r]" % mode) | |
582 | ||
583 | ||
b750b280 PG |
584 | def open_auxiliary_file(self, path, mode='r', kind=AUXILIARY_FILE_INDEX, |
585 | strict_validation=True): | |
3a7e1a50 | 586 | ''' |
9eccb1c2 PG |
587 | Given the specified configuration, opens a file for reading or writing, |
588 | inheriting the encryption and compression settings from the backup. | |
589 | Returns a file object ready to use. | |
3fdea6d4 | 590 | |
c8c72fe1 PG |
591 | :param mode: IO mode (read or write, ``"r"`` and ``"w"``, |
592 | respectively). | |
593 | :type mode: str | |
774ca538 PG |
594 | :param kind: Role of the file, see AUXILIARY_FILE_* constants. |
595 | Both the info and the auxiliary file have a globally | |
596 | unique, constant counter value. | |
3fdea6d4 | 597 | :type kind: str |
3a7e1a50 | 598 | ''' |
3a7e1a50 ERE |
599 | if self.index_mode.startswith('gz'): |
600 | comptype = 'gz' | |
601 | elif self.index_mode.startswith('bz2'): | |
602 | comptype = 'bz2' | |
603 | else: | |
604 | comptype = 'tar' | |
605 | ||
133d30da | 606 | crypto_ctx = None |
6de9444a | 607 | enccounter = None |
133d30da | 608 | if mode == "w": |
774ca538 | 609 | crypto_ctx = self.initialize_encryption (CRYPTO_MODE_ENCRYPT) |
133d30da | 610 | elif mode == "r": |
b750b280 PG |
611 | crypto_ctx = self.initialize_encryption (CRYPTO_MODE_DECRYPT, |
612 | strict_validation=strict_validation) | |
133d30da | 613 | |
3031b7ae PG |
614 | if crypto_ctx is not None: |
615 | if kind == AUXILIARY_FILE_INFO: | |
616 | enccounter = crypto.AES_GCM_IV_CNT_INFOFILE | |
617 | elif kind == AUXILIARY_FILE_INDEX: | |
618 | enccounter = crypto.AES_GCM_IV_CNT_INDEX | |
619 | else: | |
620 | raise Exception ("invalid kind of aux file %r" % kind) | |
621 | ||
c8c72fe1 | 622 | sink = tarfile._Stream(name=path, mode=mode, comptype=comptype, |
3fdea6d4 | 623 | bufsize=tarfile.RECORDSIZE, fileobj=None, |
6de9444a | 624 | encryption=crypto_ctx, enccounter=enccounter) |
c8c72fe1 PG |
625 | |
626 | return sink | |
627 | ||
3a7e1a50 | 628 | |
0708a374 | 629 | def create_full_backup(self, source_path, backup_path, |
d4a05db6 | 630 | max_volume_size=None, extra_data=dict()): |
0708a374 ERE |
631 | ''' |
632 | Creates a full backup. | |
633 | ||
634 | Parameters: | |
635 | - source_path: source path to the directory to back up. | |
636 | - backup_path: path where the back up will be stored. Backup path will | |
637 | be created if not existent. | |
d5361dac ERE |
638 | - max_volume_size: maximum volume size in megabytes. Used to split the |
639 | backup in volumes. Optional (won't split in volumes by default). | |
9eae9a1f ERE |
640 | - extra_data: a json-serializable dictionary with information that you |
641 | want to be included in the header of the index file | |
0708a374 ERE |
642 | ''' |
643 | # check input | |
be60ffd0 | 644 | if not isinstance(source_path, str): |
0708a374 ERE |
645 | raise Exception('Source path must be a string') |
646 | ||
be60ffd0 | 647 | if not isinstance(backup_path, str): |
0708a374 ERE |
648 | raise Exception('Backup path must be a string') |
649 | ||
650 | if not os.path.exists(source_path) or not os.path.isdir(source_path): | |
651 | raise Exception('Source path "%s" does not exist or is not a '\ | |
652 | 'directory' % source_path) | |
653 | ||
d07c8065 ERE |
654 | if max_volume_size != None and (not isinstance(max_volume_size, int) or\ |
655 | max_volume_size < 1): | |
656 | raise Exception('max_volume_size must be a positive integer') | |
d5361dac ERE |
657 | if max_volume_size != None: |
658 | max_volume_size = max_volume_size*1024*1024 | |
659 | ||
9eae9a1f ERE |
660 | if not isinstance(extra_data, dict): |
661 | raise Exception('extra_data must be a dictionary') | |
662 | ||
663 | try: | |
664 | extra_data_str = json.dumps(extra_data) | |
665 | except: | |
666 | raise Exception('extra_data is not json-serializable') | |
667 | ||
0708a374 ERE |
668 | if not os.access(source_path, os.R_OK): |
669 | raise Exception('Source path "%s" is not readable' % source_path) | |
670 | ||
671 | # try to create backup path if needed | |
37ab0f57 | 672 | os.makedirs(backup_path, exist_ok=True) |
0708a374 ERE |
673 | |
674 | if not os.access(backup_path, os.W_OK): | |
675 | raise Exception('Backup path "%s" is not writeable' % backup_path) | |
676 | ||
677 | if source_path.endswith('/'): | |
678 | source_path = source_path[:-1] | |
679 | ||
680 | if backup_path.endswith('/'): | |
681 | backup_path = backup_path[:-1] | |
682 | ||
683 | # update current time | |
684 | self.current_time = datetime.datetime.now() | |
685 | ||
686 | if self.mode not in self.__file_extensions_dict: | |
687 | raise Exception('Unrecognized extension') | |
688 | ||
2ae46844 | 689 | # setup for encrypting payload |
774ca538 PG |
690 | if self.encryptor is None: |
691 | self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT) | |
2ae46844 | 692 | |
0708a374 | 693 | # some initialization |
11684b1d | 694 | self.vol_no = 0 |
0708a374 ERE |
695 | |
696 | # generate the first volume name | |
697 | vol_name = self.volume_name_func(backup_path, True, 0) | |
698 | tarfile_path = os.path.join(backup_path, vol_name) | |
699 | ||
774ca538 PG |
700 | # init index |
701 | index_name = self.index_name_func(True) | |
702 | index_path = os.path.join(backup_path, index_name) | |
703 | index_sink = self.open_auxiliary_file(index_path, 'w') | |
e82f14f5 | 704 | |
d5361dac ERE |
705 | cwd = os.getcwd() |
706 | ||
b7c47f38 | 707 | def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number): |
0708a374 ERE |
708 | ''' |
709 | Handles the new volumes | |
710 | ''' | |
d5361dac ERE |
711 | volume_name = deltarobj.volume_name_func(backup_path, True, volume_number) |
712 | volume_path = os.path.join(backup_path, volume_name) | |
11684b1d | 713 | deltarobj.vol_no = volume_number |
d5361dac ERE |
714 | |
715 | # we convert relative paths into absolute because CWD is changed | |
716 | if not os.path.isabs(volume_path): | |
717 | volume_path = os.path.join(cwd, volume_path) | |
11684b1d | 718 | |
8e019196 ERE |
719 | if tarobj.fileobj is not None: |
720 | tarobj.fileobj.close() | |
721 | ||
b008f989 ERE |
722 | deltarobj.logger.debug("opening volume %s" % volume_path) |
723 | ||
b7c47f38 | 724 | tarobj.open_volume(volume_path, encryption=encryption) |
d5361dac ERE |
725 | |
726 | # wraps some args from context into the handler | |
133d30da | 727 | new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.encryptor) |
0708a374 | 728 | |
774ca538 | 729 | index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "full", "extra_data": %s}\n' % extra_data_str, 'UTF-8')) |
6c678f3a | 730 | |
be60ffd0 | 731 | s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8') |
6c678f3a | 732 | # calculate checksum and write into the stream |
c2ffe2ec | 733 | crc = binascii.crc32(s) & 0xFFFFffff |
774ca538 | 734 | index_sink.write(s) |
e82f14f5 | 735 | |
0708a374 ERE |
736 | # start creating the tarfile |
737 | tarobj = tarfile.TarFile.open(tarfile_path, | |
da26094a | 738 | mode='w' + self.mode, |
0708a374 | 739 | format=tarfile.GNU_FORMAT, |
d1c38f40 | 740 | concat='#' in self.mode, |
133d30da | 741 | encryption=self.encryptor, |
0708a374 | 742 | max_volume_size=max_volume_size, |
ea625b04 | 743 | new_volume_handler=new_volume_handler, |
e2b59b34 ERE |
744 | save_to_members=False, |
745 | dereference=True) | |
e5c6ca04 | 746 | os.chdir(source_path) |
55b8686d ERE |
747 | |
748 | # for each file to be in the backup, do: | |
e82f14f5 | 749 | for path in self._recursive_walk_dir('.'): |
3e9b81bb PG |
750 | |
751 | try: # backup file | |
fd743c26 PG |
752 | # calculate stat dict for current file |
753 | statd = self._stat_dict(path) | |
754 | statd['path'] = u'snapshot://' + statd['path'] | |
755 | statd['volume'] = self.vol_no | |
756 | ||
757 | # backup file | |
3e9b81bb PG |
758 | tarobj.add(path, arcname = statd['path'], recursive=False) |
759 | except FileNotFoundError as exn: | |
760 | # file vanished since the call to access(3) above | |
761 | self.logger.warning ("object [%s] no longer available in " | |
762 | "file system (error: %s); skipping" | |
763 | % (path, str (exn))) | |
764 | continue # prevent indexing | |
11684b1d | 765 | |
55b8686d | 766 | # retrieve file offset |
253d4cdd | 767 | statd['offset'] = tarobj.get_last_member_offset() |
b008f989 | 768 | self.logger.debug("backup %s" % statd['path']) |
6c678f3a | 769 | |
d041935c | 770 | # store the stat dict in the index |
be60ffd0 | 771 | s = bytes(json.dumps(statd) + '\n', 'UTF-8') |
6c678f3a | 772 | crc = binascii.crc32(s, crc) & 0xffffffff |
774ca538 | 773 | index_sink.write(s) |
e82f14f5 | 774 | |
be60ffd0 | 775 | s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8') |
6c678f3a | 776 | crc = binascii.crc32(s, crc) & 0xffffffff |
774ca538 | 777 | index_sink.write(s) |
be60ffd0 | 778 | s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8') |
774ca538 PG |
779 | index_sink.write(s) |
780 | ||
e5c6ca04 | 781 | os.chdir(cwd) |
0708a374 | 782 | tarobj.close() |
c8c72fe1 | 783 | index_sink.close (close_fileobj=True) |
938c2d54 | 784 | |
0708a374 | 785 | def create_diff_backup(self, source_path, backup_path, previous_index_path, |
d4a05db6 | 786 | max_volume_size=None, extra_data=dict()): |
0708a374 ERE |
787 | ''' |
788 | Creates a backup. | |
789 | ||
790 | Parameters: | |
791 | - source_path: source path to the directory to back up. | |
792 | - backup_path: path where the back up will be stored. Backup path will | |
793 | be created if not existent. | |
794 | - previous_index_path: index of the previous backup, needed to know | |
795 | which files changed since then. | |
796 | - max_volume_size: maximum volume size in megabytes (MB). Used to split | |
797 | the backup in volumes. Optional (won't split in volumes by default). | |
3a7e1a50 ERE |
798 | |
799 | NOTE: previous index is assumed to follow exactly the same format as | |
800 | the index_mode setup in the constructor. | |
0708a374 | 801 | ''' |
d07c8065 | 802 | # check/sanitize input |
be60ffd0 | 803 | if not isinstance(source_path, str): |
d07c8065 ERE |
804 | raise Exception('Source path must be a string') |
805 | ||
be60ffd0 | 806 | if not isinstance(backup_path, str): |
d07c8065 ERE |
807 | raise Exception('Backup path must be a string') |
808 | ||
809 | if not os.path.exists(source_path) or not os.path.isdir(source_path): | |
810 | raise Exception('Source path "%s" does not exist or is not a '\ | |
811 | 'directory' % source_path) | |
812 | ||
9eae9a1f ERE |
813 | if not isinstance(extra_data, dict): |
814 | raise Exception('extra_data must be a dictionary') | |
815 | ||
816 | try: | |
817 | extra_data_str = json.dumps(extra_data) | |
818 | except: | |
819 | raise Exception('extra_data is not json-serializable') | |
820 | ||
d07c8065 ERE |
821 | if not os.access(source_path, os.R_OK): |
822 | raise Exception('Source path "%s" is not readable' % source_path) | |
823 | ||
824 | if max_volume_size != None and (not isinstance(max_volume_size, int) or\ | |
825 | max_volume_size < 1): | |
826 | raise Exception('max_volume_size must be a positive integer') | |
827 | if max_volume_size != None: | |
828 | max_volume_size = max_volume_size*1024*1024 | |
829 | ||
be60ffd0 | 830 | if not isinstance(previous_index_path, str): |
d07c8065 ERE |
831 | raise Exception('previous_index_path must be A string') |
832 | ||
833 | if not os.path.exists(previous_index_path) or not os.path.isfile(previous_index_path): | |
834 | raise Exception('Index path "%s" does not exist or is not a '\ | |
835 | 'file' % previous_index_path) | |
836 | ||
837 | if not os.access(previous_index_path, os.R_OK): | |
838 | raise Exception('Index path "%s" is not readable' % previous_index_path) | |
839 | ||
840 | # try to create backup path if needed | |
37ab0f57 | 841 | os.makedirs(backup_path, exist_ok=True) |
d07c8065 ERE |
842 | |
843 | if not os.access(backup_path, os.W_OK): | |
844 | raise Exception('Backup path "%s" is not writeable' % backup_path) | |
845 | ||
846 | if source_path.endswith('/'): | |
847 | source_path = source_path[:-1] | |
848 | ||
849 | if backup_path.endswith('/'): | |
850 | backup_path = backup_path[:-1] | |
851 | ||
852 | # update current time | |
853 | self.current_time = datetime.datetime.now() | |
854 | ||
855 | if self.mode not in self.__file_extensions_dict: | |
856 | raise Exception('Unrecognized extension') | |
857 | ||
2ae46844 | 858 | # setup for encrypting payload |
774ca538 PG |
859 | if self.encryptor is None: |
860 | self.encryptor = self.initialize_encryption (CRYPTO_MODE_ENCRYPT) | |
133d30da | 861 | |
d07c8065 ERE |
862 | # some initialization |
863 | self.vol_no = 0 | |
864 | ||
865 | # generate the first volume name | |
df86af81 ERE |
866 | vol_name = self.volume_name_func(backup_path, is_full=False, |
867 | volume_number=0) | |
d07c8065 ERE |
868 | tarfile_path = os.path.join(backup_path, vol_name) |
869 | ||
938c2d54 | 870 | # init index |
d07c8065 ERE |
871 | cwd = os.getcwd() |
872 | ||
3031b7ae PG |
873 | index_name = self.index_name_func(is_full=False) |
874 | index_path = os.path.join(backup_path, index_name) | |
875 | index_sink = self.open_auxiliary_file(index_path, 'w') | |
876 | ||
d07c8065 ERE |
877 | def new_volume_handler(deltarobj, cwd, backup_path, tarobj, base_name, volume_number): |
878 | ''' | |
879 | Handles the new volumes | |
880 | ''' | |
df86af81 ERE |
881 | volume_name = deltarobj.volume_name_func(backup_path, is_full=False, |
882 | volume_number=volume_number) | |
d07c8065 ERE |
883 | volume_path = os.path.join(backup_path, volume_name) |
884 | deltarobj.vol_no = volume_number | |
885 | ||
886 | # we convert relative paths into absolute because CWD is changed | |
887 | if not os.path.isabs(volume_path): | |
888 | volume_path = os.path.join(cwd, volume_path) | |
889 | ||
f624ff3d | 890 | deltarobj.logger.debug("opening volume %s" % volume_path) |
d07c8065 ERE |
891 | tarobj.open_volume(volume_path) |
892 | ||
893 | # wraps some args from context into the handler | |
894 | new_volume_handler = partial(new_volume_handler, self, cwd, backup_path) | |
895 | ||
3031b7ae | 896 | index_sink.write(bytes('{"type": "python-delta-tar-index", "version": 1, "backup-type": "diff", "extra_data": %s}\n' % extra_data_str, 'UTF-8')) |
d07c8065 | 897 | |
be60ffd0 | 898 | s = bytes('{"type": "BEGIN-FILE-LIST"}\n', 'UTF-8') |
d07c8065 | 899 | # calculate checksum and write into the stream |
c2ffe2ec | 900 | crc = binascii.crc32(s) & 0xFFFFffff |
3031b7ae | 901 | index_sink.write(s) |
d07c8065 ERE |
902 | |
903 | # start creating the tarfile | |
904 | tarobj = tarfile.TarFile.open(tarfile_path, | |
905 | mode='w' + self.mode, | |
906 | format=tarfile.GNU_FORMAT, | |
d1c38f40 | 907 | concat='#' in self.mode, |
133d30da | 908 | encryption=self.encryptor, |
d07c8065 | 909 | max_volume_size=max_volume_size, |
ea625b04 | 910 | new_volume_handler=new_volume_handler, |
e2b59b34 ERE |
911 | save_to_members=False, |
912 | dereference=True) | |
d07c8065 | 913 | |
aae127d0 ERE |
914 | |
915 | # create the iterators, first the previous index iterator, then the | |
916 | # source path directory iterator and collate and iterate them | |
917 | if not os.path.isabs(previous_index_path): | |
918 | previous_index_path = os.path.join(cwd, previous_index_path) | |
919 | index_it = self.iterate_index_path(previous_index_path) | |
920 | ||
d07c8065 | 921 | os.chdir(source_path) |
aae127d0 ERE |
922 | dir_it = self._recursive_walk_dir('.') |
923 | dir_path_it = self.jsonize_path_iterator(dir_it) | |
d07c8065 | 924 | |
df86af81 ERE |
925 | def pr(path): |
926 | if not path: | |
927 | return "None" | |
928 | else: | |
929 | return path["path"] | |
8edb2e3c | 930 | |
d07c8065 | 931 | # for each file to be in the backup, do: |
df86af81 | 932 | for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it): |
aae127d0 ERE |
933 | action = None |
934 | # if file is not in the index, it means it's a new file, so we have | |
935 | # to take a snapshot | |
df86af81 | 936 | |
aae127d0 ERE |
937 | if not ipath: |
938 | action = 'snapshot' | |
939 | # if the file is not in the directory iterator, it means that it has | |
d041935c | 940 | # been deleted, so we need to mark it as such |
aae127d0 ERE |
941 | elif not dpath: |
942 | action = 'delete' | |
943 | # if the file is in both iterators, it means it might have either | |
944 | # not changed (in which case we will just list it in our index but | |
945 | # it will not be included in the tar file), or it might have | |
e8d95fe5 | 946 | # changed, in which case we will snapshot it. |
aae127d0 ERE |
947 | elif ipath and dpath: |
948 | if self._equal_stat_dicts(ipath, dpath): | |
949 | action = 'list' | |
950 | else: | |
951 | action = 'snapshot' | |
952 | # TODO: when creating chained backups (i.e. diffing from another | |
953 | # diff), we will need to detect the type of action in the previous | |
954 | # index, because if it was delete and dpath is None, we should | |
955 | # discard the file | |
956 | ||
957 | if action == 'snapshot': | |
958 | # calculate stat dict for current file | |
959 | stat = dpath.copy() | |
be60ffd0 | 960 | stat['path'] = "snapshot://" + dpath['path'] |
aae127d0 ERE |
961 | stat['volume'] = self.vol_no |
962 | ||
50f43227 ERE |
963 | self.logger.debug("[STORE] %s" % dpath['path']) |
964 | ||
3e9b81bb PG |
965 | try: # backup file |
966 | tarobj.add(dpath['path'], arcname=stat['path'], recursive=False) | |
967 | # retrieve file offset | |
968 | stat['offset'] = tarobj.get_last_member_offset() | |
969 | except FileNotFoundError as exn: | |
970 | # file vanished since the call to access(3) above | |
971 | self.logger.warning ("object [%s] no longer available in " | |
972 | "file system (error: %s); skipping" | |
973 | % (dpath ["path"], str (exn))) | |
974 | stat = None # prevent indexing | |
aae127d0 | 975 | |
aae127d0 | 976 | elif action == 'delete': |
50f43227 | 977 | path = self.unprefixed(ipath['path']) |
aae127d0 | 978 | stat = { |
50f43227 | 979 | u'path': u'delete://' + path, |
aae127d0 ERE |
980 | u'type': ipath['type'] |
981 | } | |
50f43227 | 982 | self.logger.debug("[DELETE] %s" % path) |
aae127d0 ERE |
983 | |
984 | # mark it as deleted in the backup | |
42d39ca7 | 985 | tarobj.add("/dev/null", arcname=stat['path']) |
aae127d0 ERE |
986 | elif action == 'list': |
987 | stat = dpath.copy() | |
50f43227 ERE |
988 | path = self.unprefixed(ipath['path']) |
989 | stat['path'] = u'list://' + path | |
aae127d0 | 990 | # unchanged files do not enter in the backup, only in the index |
50f43227 | 991 | self.logger.debug("[UNCHANGED] %s" % path) |
80910564 TJ |
992 | else: |
993 | # should not happen | |
4bda6f45 | 994 | self.logger.warning('unknown action in create_diff_backup: {0}' |
80910564 TJ |
995 | ''.format(action)) |
996 | stat = None | |
aae127d0 | 997 | |
80910564 TJ |
998 | if stat: |
999 | # store the stat dict in the index | |
be60ffd0 | 1000 | s = bytes(json.dumps(stat) + '\n', 'UTF-8') |
aae127d0 | 1001 | crc = binascii.crc32(s, crc) & 0xffffffff |
3031b7ae | 1002 | index_sink.write(s) |
aae127d0 | 1003 | |
be60ffd0 | 1004 | s = bytes('{"type": "END-FILE-LIST"}\n', 'UTF-8') |
aae127d0 | 1005 | crc = binascii.crc32(s, crc) & 0xffffffff |
3031b7ae | 1006 | index_sink.write(s) |
be60ffd0 | 1007 | s = bytes('{"type": "file-list-checksum", "checksum": %d}\n' % crc, 'UTF-8') |
3031b7ae | 1008 | index_sink.write(s) |
938c2d54 | 1009 | |
df86af81 | 1010 | index_it.release() |
aae127d0 ERE |
1011 | os.chdir(cwd) |
1012 | tarobj.close() | |
938c2d54 PG |
1013 | index_sink.close() |
1014 | ||
1015 | ||
b750b280 | 1016 | def iterate_index_path(self, index_path, strict_validation=True): |
df86af81 ERE |
1017 | ''' |
1018 | Returns an index iterator. Internally, it uses a classic iterator class. | |
1019 | We do that instead of just yielding so that the iterator object can have | |
1020 | an additional function to close the file descriptor that is opened in | |
1021 | the constructor. | |
1022 | ''' | |
d07c8065 | 1023 | |
df86af81 ERE |
1024 | class IndexPathIterator(object): |
1025 | def __init__(self, delta_tar, index_path): | |
1026 | self.delta_tar = delta_tar | |
1027 | self.index_path = index_path | |
1028 | self.f = None | |
9eae9a1f | 1029 | self.extra_data = dict() |
df86af81 | 1030 | self.__enter__() |
d07c8065 | 1031 | |
df86af81 ERE |
1032 | def __iter__(self): |
1033 | return self | |
d07c8065 | 1034 | |
df86af81 ERE |
1035 | def release(self): |
1036 | if self.f: | |
1037 | self.f.close() | |
1038 | ||
1039 | def __enter__(self): | |
1040 | ''' | |
1041 | Allows this iterator to be used with the "with" statement | |
1042 | ''' | |
1043 | if self.f is None: | |
b750b280 PG |
1044 | self.f = self.delta_tar.open_auxiliary_file \ |
1045 | (self.index_path, | |
1046 | 'r', | |
1047 | strict_validation=strict_validation) | |
df86af81 ERE |
1048 | # check index header |
1049 | j, l_no = self.delta_tar._parse_json_line(self.f, 0) | |
1050 | if j.get("type", '') != 'python-delta-tar-index' or\ | |
1051 | j.get('version', -1) != 1: | |
1052 | raise Exception("invalid index file format: %s" % json.dumps(j)) | |
1053 | ||
9eae9a1f ERE |
1054 | self.extra_data = j.get('extra_data', dict()) |
1055 | ||
df86af81 ERE |
1056 | # find BEGIN-FILE-LIST, ignore other headers |
1057 | while True: | |
1058 | j, l_no = self.delta_tar._parse_json_line(self.f, l_no) | |
1059 | if j.get('type', '') == 'BEGIN-FILE-LIST': | |
1060 | break | |
1061 | return self | |
1062 | ||
1063 | def __exit__(self, type, value, tb): | |
1064 | ''' | |
1065 | Allows this iterator to be used with the "with" statement | |
1066 | ''' | |
ec57ce53 ERE |
1067 | if self.f: |
1068 | self.f.close() | |
df86af81 | 1069 | self.f = None |
d07c8065 | 1070 | |
be60ffd0 | 1071 | def __next__(self): |
0349168a | 1072 | # read each file in the index and process it to do the restore |
df86af81 ERE |
1073 | j = {} |
1074 | l_no = -1 | |
1075 | try: | |
1076 | j, l_no = self.delta_tar._parse_json_line(self.f, l_no) | |
be60ffd0 | 1077 | except Exception as e: |
df86af81 ERE |
1078 | if self.f: |
1079 | self.f.close() | |
1080 | raise e | |
d07c8065 | 1081 | |
df86af81 | 1082 | op_type = j.get('type', '') |
d07c8065 | 1083 | |
df86af81 ERE |
1084 | # when we detect the end of the list, break the loop |
1085 | if op_type == 'END-FILE-LIST': | |
1086 | if self.f: | |
1087 | self.f.close() | |
1088 | raise StopIteration | |
1089 | ||
1090 | # check input | |
1091 | if op_type not in ['directory', 'file', 'link']: | |
4bda6f45 | 1092 | self.delta_tar.logger.warning('unrecognized type to be ' |
df86af81 ERE |
1093 | 'restored: %s, line %d' % (op_type, l_no)) |
1094 | # iterate again | |
be60ffd0 | 1095 | return self.__next__() |
df86af81 ERE |
1096 | |
1097 | return j, l_no | |
d07c8065 | 1098 | |
df86af81 | 1099 | return IndexPathIterator(self, index_path) |
d07c8065 | 1100 | |
26fdd428 | 1101 | def iterate_tar_path(self, tar_path, new_volume_handler=None): |
24ddf0a2 ERE |
1102 | ''' |
1103 | Returns a tar iterator that iterates jsonized member items that contain | |
1104 | an additional "member" field, used by RestoreHelper. | |
1105 | ''' | |
ec57ce53 | 1106 | class TarPathIterator(object): |
83a81852 | 1107 | def __init__(self, delta_tar, tar_path, new_volume_handler=None): |
24ddf0a2 | 1108 | self.delta_tar = delta_tar |
ec57ce53 | 1109 | self.tar_path = tar_path |
24ddf0a2 | 1110 | self.tar_obj = None |
6bca471c | 1111 | self.last_member = None |
26fdd428 | 1112 | self.new_volume_handler = new_volume_handler |
24ddf0a2 ERE |
1113 | self.__enter__() |
1114 | ||
1115 | def __iter__(self): | |
1116 | return self | |
1117 | ||
1118 | def release(self): | |
1119 | if self.tar_obj: | |
1120 | self.tar_obj.close() | |
1121 | ||
1122 | def __enter__(self): | |
1123 | ''' | |
1124 | Allows this iterator to be used with the "with" statement | |
1125 | ''' | |
1126 | if self.tar_obj is None: | |
d5e1d60f PG |
1127 | decryptor = None |
1128 | if self.delta_tar.password is not None: | |
1f3fd7b0 PG |
1129 | decryptor = crypto.Decrypt \ |
1130 | (password=self.delta_tar.password, | |
b750b280 PG |
1131 | key=self.delta_tar.crypto_key, |
1132 | strict_ivs=False) | |
ec57ce53 ERE |
1133 | self.tar_obj = tarfile.TarFile.open(self.tar_path, |
1134 | mode='r' + self.delta_tar.mode, | |
1135 | format=tarfile.GNU_FORMAT, | |
d1c38f40 | 1136 | concat='#' in self.delta_tar.mode, |
d5e1d60f | 1137 | encryption=decryptor, |
83a81852 | 1138 | new_volume_handler=self.new_volume_handler, |
e2b59b34 ERE |
1139 | save_to_members=False, |
1140 | dereference=True) | |
24ddf0a2 ERE |
1141 | return self |
1142 | ||
1143 | def __exit__(self, type, value, tb): | |
1144 | ''' | |
1145 | Allows this iterator to be used with the "with" statement | |
1146 | ''' | |
ec57ce53 ERE |
1147 | if self.tar_obj: |
1148 | self.tar_obj.close() | |
24ddf0a2 ERE |
1149 | self.tar_obj = None |
1150 | ||
be60ffd0 | 1151 | def __next__(self): |
24ddf0a2 ERE |
1152 | ''' |
1153 | Read each member and return it as a stat dict | |
1154 | ''' | |
be60ffd0 | 1155 | tarinfo = self.tar_obj.__iter__().__next__() |
8e019196 ERE |
1156 | # NOTE: here we compare if tarinfo.path is the same as before |
1157 | # instead of comparing the tarinfo object itself because the | |
1158 | # object itself might change for multivol tarinfos | |
1159 | if tarinfo is None or (self.last_member is not None and\ | |
1160 | self.delta_tar.unprefixed(tarinfo.path) == self.delta_tar.unprefixed(self.last_member.path)): | |
ec57ce53 ERE |
1161 | raise StopIteration |
1162 | ||
6bca471c ERE |
1163 | self.last_member = tarinfo |
1164 | ||
24ddf0a2 ERE |
1165 | ptype = 'unknown' |
1166 | if tarinfo.isfile(): | |
1167 | ptype = 'file' | |
1168 | elif tarinfo.isdir(): | |
ab7e7465 | 1169 | ptype = 'directory' |
24ddf0a2 ERE |
1170 | elif tarinfo.islnk() or tarinfo.issym(): |
1171 | ptype = 'link' | |
1172 | ||
1173 | return { | |
1174 | u'type': ptype, | |
1175 | u'path': tarinfo.path, | |
1176 | u'mode': tarinfo.mode, | |
1177 | u'mtime': tarinfo.mtime, | |
1178 | u'ctime': -1, # cannot restore | |
1179 | u'uid': tarinfo.uid, | |
1180 | u'gid': tarinfo.gid, | |
1181 | u'inode': -1, # cannot restore | |
1182 | u'size': tarinfo.size, | |
1183 | u'member': tarinfo | |
ec57ce53 ERE |
1184 | }, 0 |
1185 | ||
26fdd428 | 1186 | return TarPathIterator(self, tar_path, new_volume_handler) |
24ddf0a2 | 1187 | |
df99a044 | 1188 | def jsonize_path_iterator(self, iter, strip=0): |
d07c8065 ERE |
1189 | ''' |
1190 | converts the yielded items of an iterator into json path lines. | |
df99a044 ERE |
1191 | |
1192 | strip: Strip the smallest prefix containing num leading slashes from | |
1193 | the file path. | |
d07c8065 ERE |
1194 | ''' |
1195 | while True: | |
1196 | try: | |
be60ffd0 | 1197 | path = iter.__next__() |
df99a044 | 1198 | if strip == 0: |
4ac6d333 | 1199 | yield self._stat_dict(path), 0 |
df99a044 ERE |
1200 | else: |
1201 | st = self._stat_dict(path) | |
1202 | st['path'] = "/".join(path.split("/")[strip:]) | |
4ac6d333 | 1203 | yield st, 0 |
d07c8065 ERE |
1204 | except StopIteration: |
1205 | break | |
1206 | ||
b84beea7 PG |
1207 | def iterate_disaster_index (self, index): |
1208 | """ | |
1209 | Mimick the behavior of the other object iterators, just with the inputs | |
1210 | supplied directly as *index*. | |
1211 | """ | |
1212 | ||
1213 | class RawIndexIterator(object): | |
65b35c42 | 1214 | def __init__(self, delta_tar, index): |
b84beea7 PG |
1215 | self.delta_tar = delta_tar |
1216 | self.index = index | |
1217 | self.__enter__() | |
1218 | ||
1219 | def __iter__(self): | |
1220 | return self | |
1221 | ||
1222 | def release(self): | |
65b35c42 | 1223 | pass |
b84beea7 PG |
1224 | |
1225 | def __enter__(self): | |
1226 | ''' | |
1227 | Allows this iterator to be used with the "with" statement | |
1228 | ''' | |
1229 | self.iter = self.index.__iter__ () | |
1230 | return self | |
1231 | ||
1232 | def __exit__(self, type, value, tb): | |
1233 | ''' | |
1234 | Allows this iterator to be used with the "with" statement | |
1235 | ''' | |
1236 | ||
1237 | def __next__(self): | |
1238 | idxent = self.iter.__next__ () | |
65b35c42 | 1239 | return idxent, 0 |
b84beea7 PG |
1240 | |
1241 | return RawIndexIterator(self, index) | |
1242 | ||
d07c8065 ERE |
1243 | def collate_iterators(self, it1, it2): |
1244 | ''' | |
1245 | Collate two iterators, so that it returns pairs of the items of each | |
1246 | iterator (if the items are the same), or (None, elem2) or (elem1, None) | |
1247 | when there's no match for the items in the other iterator. | |
1248 | ||
1249 | It assumes that the items in both lists are ordered in the same way. | |
1250 | ''' | |
ea6d3c3e | 1251 | l_no = 0 |
d07c8065 ERE |
1252 | elem1, elem2 = None, None |
1253 | while True: | |
1254 | if not elem1: | |
1255 | try: | |
be60ffd0 | 1256 | elem1, l_no = it1.__next__() |
d07c8065 ERE |
1257 | except StopIteration: |
1258 | if elem2: | |
ea6d3c3e | 1259 | yield (None, elem2, l_no) |
d07c8065 | 1260 | for elem2 in it2: |
ea6d3c3e ERE |
1261 | if isinstance(elem2, tuple): |
1262 | elem2 = elem2[0] | |
1263 | yield (None, elem2, l_no) | |
d07c8065 | 1264 | break |
d07c8065 ERE |
1265 | if not elem2: |
1266 | try: | |
be60ffd0 | 1267 | elem2 = it2.__next__() |
d07c8065 ERE |
1268 | if isinstance(elem2, tuple): |
1269 | elem2 = elem2[0] | |
1270 | except StopIteration: | |
1271 | if elem1: | |
ea6d3c3e | 1272 | yield (elem1, None, l_no) |
df99a044 | 1273 | for elem1, l_no in it1: |
ea6d3c3e | 1274 | yield (elem1, None, l_no) |
d07c8065 | 1275 | break |
670f9934 ERE |
1276 | |
1277 | index1 = self.unprefixed(elem1['path']) | |
1278 | index2 = self.unprefixed(elem2['path']) | |
1279 | i1, i2 = self.compare_indexes(index1, index2) | |
1280 | ||
1281 | yield1 = yield2 = None | |
1282 | if i1 is not None: | |
1283 | yield1 = elem1 | |
1284 | elem1 = None | |
1285 | if i2 is not None: | |
1286 | yield2 = elem2 | |
1287 | elem2 = None | |
1288 | yield (yield1, yield2, l_no) | |
1289 | ||
1290 | def compare_indexes(self, index1, index2): | |
1291 | ''' | |
1292 | Compare iterator indexes and return a tuple in the following form: | |
1293 | if index1 < index2, returns (index1, None) | |
1294 | if index1 == index2 returns (index1, index2) | |
1295 | else: returns (None, index2) | |
1296 | ''' | |
1297 | l1 = index1.split('/') | |
1298 | l2 = index2.split('/') | |
1299 | length = len(l2) - len(l1) | |
1300 | ||
1301 | if length > 0: | |
1302 | return (index1, None) | |
1303 | elif length < 0: | |
1304 | return (None, index2) | |
1305 | ||
1306 | for i1, i2 in zip(l1, l2): | |
1307 | if i1 < i2: | |
1308 | return (index1, None) | |
1309 | elif i1 > i2: | |
1310 | return (None, index2) | |
1311 | ||
1312 | return (index1, index2) | |
0708a374 | 1313 | |
8c65a2b1 | 1314 | def list_backup(self, backup_tar_path, list_func=None): |
be60ffd0 | 1315 | if not isinstance(backup_tar_path, str): |
8c65a2b1 ERE |
1316 | raise Exception('Backup tar path must be a string') |
1317 | ||
1318 | if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path): | |
1319 | raise Exception('Source path "%s" does not exist or is not a '\ | |
1320 | 'file' % backup_tar_path) | |
1321 | ||
1322 | if not os.access(backup_tar_path, os.R_OK): | |
1323 | raise Exception('Source path "%s" is not readable' % backup_tar_path) | |
1324 | ||
1325 | cwd = os.getcwd() | |
1326 | ||
b7c47f38 | 1327 | def new_volume_handler(deltarobj, cwd, backup_path, encryption, tarobj, base_name, volume_number): |
8c65a2b1 ERE |
1328 | ''' |
1329 | Handles the new volumes | |
1330 | ''' | |
1331 | volume_name = deltarobj.volume_name_func(backup_path, True, | |
1332 | volume_number, guess_name=True) | |
1333 | volume_path = os.path.join(backup_path, volume_name) | |
1334 | ||
1335 | # we convert relative paths into absolute because CWD is changed | |
1336 | if not os.path.isabs(volume_path): | |
1337 | volume_path = os.path.join(cwd, volume_path) | |
b7c47f38 PG |
1338 | tarobj.open_volume(volume_path, encryption=encryption) |
1339 | ||
774ca538 | 1340 | if self.decryptor is None: |
b750b280 PG |
1341 | self.decryptor = \ |
1342 | self.initialize_encryption (CRYPTO_MODE_DECRYPT, | |
1343 | strict_validation=False) | |
8c65a2b1 ERE |
1344 | |
1345 | backup_path = os.path.dirname(backup_tar_path) | |
1346 | if not os.path.isabs(backup_path): | |
1347 | backup_path = os.path.join(cwd, backup_path) | |
133d30da | 1348 | new_volume_handler = partial(new_volume_handler, self, cwd, backup_path, self.decryptor) |
b7a6566b | 1349 | |
8c65a2b1 ERE |
1350 | tarobj = tarfile.TarFile.open(backup_tar_path, |
1351 | mode='r' + self.mode, | |
1352 | format=tarfile.GNU_FORMAT, | |
d1c38f40 | 1353 | concat='#' in self.mode, |
133d30da | 1354 | encryption=self.decryptor, |
ea625b04 | 1355 | new_volume_handler=new_volume_handler, |
e2b59b34 ERE |
1356 | save_to_members=False, |
1357 | dereference=True) | |
8c65a2b1 ERE |
1358 | |
1359 | def filter(cls, list_func, tarinfo): | |
1360 | if list_func is None: | |
b008f989 | 1361 | self.logger.info(tarinfo.path) |
8c65a2b1 ERE |
1362 | else: |
1363 | list_func(tarinfo) | |
1364 | return False | |
1365 | filter = partial(filter, self, list_func) | |
1366 | ||
c650acfa | 1367 | tarobj.extractall(filter=filter, unlink=True) |
8c65a2b1 ERE |
1368 | tarobj.close() |
1369 | ||
0708a374 | 1370 | def restore_backup(self, target_path, backup_indexes_paths=[], |
e93f83f1 | 1371 | backup_tar_path=None, restore_callback=None, |
b750b280 PG |
1372 | disaster=tarfile.TOLERANCE_STRICT, backup_index=None, |
1373 | strict_validation=True): | |
0708a374 ERE |
1374 | ''' |
1375 | Restores a backup. | |
1376 | ||
1377 | Parameters: | |
0708a374 ERE |
1378 | - target_path: path to restore. |
1379 | - backup_indexes_paths: path to backup indexes, in descending date order. | |
1380 | The indexes indicate the location of their respective backup volumes, | |
1381 | and multiple indexes are needed to be able to restore diff backups. | |
1382 | Note that this is an optional parameter: if not suplied, it will | |
1383 | try to restore directly from backup_tar_path. | |
1384 | - backup_tar_path: path to the backup tar file. Used as an alternative | |
1385 | to backup_indexes_paths to restore directly from a tar file without | |
1386 | using any file index. If it's a multivol tarfile, volume_name_func | |
1387 | will be called. | |
4da27cfe | 1388 | - restore_callback: callback function to be called during restore. |
b0aef801 | 1389 | This is passed to the helper and gets called for every file. |
11684b1d | 1390 | |
3a7e1a50 | 1391 | NOTE: If you want to use an index to restore a backup, this function |
11684b1d ERE |
1392 | only supports to do so when the tarfile mode is either uncompressed or |
1393 | uses concat compress mode, because otherwise it would be very slow. | |
3a7e1a50 ERE |
1394 | |
1395 | NOTE: Indices are assumed to follow the same format as the index_mode | |
1396 | specified in the constructor. | |
e93f83f1 PG |
1397 | |
1398 | Returns the list of files that could not be restored, if there were | |
1399 | any. | |
0708a374 | 1400 | ''' |
11684b1d | 1401 | # check/sanitize input |
be60ffd0 | 1402 | if not isinstance(target_path, str): |
e5c6ca04 ERE |
1403 | raise Exception('Target path must be a string') |
1404 | ||
11684b1d ERE |
1405 | if backup_indexes_paths is None and backup_tar_path == []: |
1406 | raise Exception("You have to either provide index paths or a tar path") | |
e5c6ca04 | 1407 | |
b84beea7 PG |
1408 | if isinstance (backup_index, list) is True: |
1409 | mode = "disaster" | |
1410 | elif len(backup_indexes_paths) == 0: | |
ea6d3c3e ERE |
1411 | mode = "tar" |
1412 | else: | |
1413 | mode = "diff" | |
1414 | ||
1415 | if mode == "tar": | |
be60ffd0 | 1416 | if not isinstance(backup_tar_path, str): |
11684b1d ERE |
1417 | raise Exception('Backup tar path must be a string') |
1418 | ||
1419 | if not os.path.exists(backup_tar_path) or not os.path.isfile(backup_tar_path): | |
1420 | raise Exception('Source path "%s" does not exist or is not a '\ | |
1421 | 'file' % backup_tar_path) | |
1422 | ||
1423 | if not os.access(backup_tar_path, os.R_OK): | |
1424 | raise Exception('Source path "%s" is not readable' % backup_tar_path) | |
1425 | else: | |
1426 | if not isinstance(backup_indexes_paths, list): | |
1427 | raise Exception('backup_indexes_paths must be a list') | |
1428 | ||
1429 | if self.mode.startswith(':') or self.mode.startswith('|'): | |
1430 | raise Exception('Restore only supports either uncompressed tars' | |
1431 | ' or concat compression when restoring from an index, and ' | |
1432 | ' the open mode you provided is "%s"' % self.mode) | |
1433 | ||
1434 | for index in backup_indexes_paths: | |
be60ffd0 | 1435 | if not isinstance(index, str): |
11684b1d | 1436 | raise Exception('indices must be strings') |
e5c6ca04 | 1437 | |
11684b1d ERE |
1438 | if not os.path.exists(index) or not os.path.isfile(index): |
1439 | raise Exception('Index path "%s" does not exist or is not a '\ | |
1440 | 'file' % index) | |
1441 | ||
1442 | if not os.access(index, os.R_OK): | |
1443 | raise Exception('Index path "%s" is not readable' % index) | |
e5c6ca04 ERE |
1444 | |
1445 | # try to create backup path if needed | |
37ab0f57 | 1446 | os.makedirs(target_path, exist_ok=True) |
e5c6ca04 | 1447 | |
ec57ce53 ERE |
1448 | # make backup_tar_path absolute so that iterate_tar_path works fine |
1449 | if backup_tar_path and not os.path.isabs(backup_tar_path): | |
1450 | backup_tar_path = os.path.abspath(backup_tar_path) | |
1451 | ||
d5361dac | 1452 | cwd = os.getcwd() |
ec57ce53 | 1453 | os.chdir(target_path) |
d5361dac | 1454 | |
2ae46844 | 1455 | # setup for decrypting payload |
774ca538 | 1456 | if self.decryptor is None: |
b750b280 PG |
1457 | self.decryptor = \ |
1458 | self.initialize_encryption (CRYPTO_MODE_DECRYPT, | |
1459 | strict_validation=strict_validation) | |
2ae46844 | 1460 | |
ea6d3c3e | 1461 | if mode == 'tar': |
24ddf0a2 ERE |
1462 | index_it = self.iterate_tar_path(backup_tar_path) |
1463 | helper = RestoreHelper(self, cwd, backup_path=backup_tar_path, | |
ec57ce53 | 1464 | tarobj=index_it.tar_obj) |
ea6d3c3e | 1465 | elif mode == "diff": |
04f4c7ab PG |
1466 | helper = RestoreHelper(self, cwd, backup_indexes_paths, |
1467 | disaster=disaster) | |
f3d10816 PG |
1468 | try: |
1469 | # get iterator from newest index at _data[0] | |
1470 | index1 = helper._data[0]["path"] | |
b750b280 PG |
1471 | index_it = \ |
1472 | self.iterate_index_path(index1, | |
1473 | strict_validation=strict_validation) | |
f3d10816 PG |
1474 | except tarfile.DecryptionError as exn: |
1475 | self.logger.error("failed to decrypt file [%s]: %s; is this an " | |
afc87ebc PG |
1476 | "actual encrypted index file?" |
1477 | % (index1, str (exn))) | |
1478 | return [(index1, exn)] | |
1479 | except Exception as exn: | |
1480 | # compressed files | |
1481 | self.logger.error("failed to read file [%s]: %s; is this an " | |
1482 | "actual index file?" % (index1, str (exn))) | |
f3d10816 | 1483 | return [(index1, exn)] |
b84beea7 PG |
1484 | elif mode == "disaster": |
1485 | index_it = self.iterate_disaster_index (backup_index) | |
65b35c42 PG |
1486 | helper = RestoreHelper (self, cwd, backup_path=backup_tar_path, |
1487 | backup_index=backup_index, | |
1488 | disaster=disaster) | |
b84beea7 | 1489 | |
b750b280 | 1490 | index_decryptor = helper._data[0]["decryptor"] |
d07c8065 | 1491 | |
24ddf0a2 ERE |
1492 | dir_it = self._recursive_walk_dir('.') |
1493 | dir_path_it = self.jsonize_path_iterator(dir_it) | |
11684b1d | 1494 | |
e93f83f1 PG |
1495 | failed = [] # irrecoverable files |
1496 | ||
a395759e | 1497 | # for each file to be restored, do: |
24ddf0a2 ERE |
1498 | for ipath, dpath, l_no in self.collate_iterators(index_it, dir_path_it): |
1499 | if not ipath: | |
1500 | upath = dpath['path'] | |
1501 | op_type = dpath['type'] | |
1502 | else: | |
1503 | upath = self.unprefixed(ipath['path']) | |
1504 | op_type = ipath['type'] | |
42c04ead | 1505 | |
24ddf0a2 | 1506 | # filter paths |
75059f3c | 1507 | if self.filter_path(upath, '', op_type == 'directory') == NO_MATCH: |
24ddf0a2 | 1508 | continue |
ea6d3c3e | 1509 | |
24ddf0a2 ERE |
1510 | # if types of the file mismatch, the file needs to be deleted |
1511 | # and re-restored | |
1512 | if ipath is not None and dpath is not None and\ | |
1513 | dpath['type'] != ipath['type']: | |
1514 | helper.delete(upath) | |
1515 | ||
1516 | # if file not found in dpath, we can directly restore from index | |
1517 | if not dpath: | |
1518 | # if the file doesn't exist and it needs to be deleted, it | |
1519 | # means that work is already done | |
1520 | if ipath['path'].startswith('delete://'): | |
ea6d3c3e | 1521 | continue |
24ddf0a2 | 1522 | try: |
b008f989 | 1523 | self.logger.debug("restore %s" % ipath['path']) |
4da27cfe | 1524 | helper.restore(ipath, l_no, restore_callback) |
be60ffd0 | 1525 | except Exception as e: |
e93f83f1 | 1526 | iipath = ipath.get ("path", "") |
7b07645e | 1527 | self.logger.error("FAILED to restore: {} ({})" |
e93f83f1 | 1528 | .format(iipath, e)) |
04f4c7ab | 1529 | if disaster != tarfile.TOLERANCE_STRICT: |
e93f83f1 | 1530 | failed.append ((iipath, e)) |
24ddf0a2 | 1531 | continue |
11684b1d | 1532 | |
24ddf0a2 ERE |
1533 | # if both files are equal, we have nothing to restore |
1534 | if self._equal_stat_dicts(ipath, dpath, listsnapshot_equal=True): | |
1535 | continue | |
1536 | ||
1537 | # we have to restore the file, but first we need to delete the | |
1538 | # current existing file. | |
1539 | # we don't delete the file if it's a directory, because it might | |
1540 | # just have changed mtime, so it's quite inefficient to remove | |
1541 | # it | |
1542 | if ipath: | |
1543 | if ipath['type'] != 'directory' or ipath['path'].startswith('delete://'): | |
42c04ead | 1544 | helper.delete(upath) |
b008f989 | 1545 | self.logger.debug("restore %s" % ipath['path']) |
e93f83f1 PG |
1546 | try: |
1547 | helper.restore(ipath, l_no, restore_callback) | |
1548 | except Exception as e: | |
04f4c7ab | 1549 | if disaster == tarfile.TOLERANCE_STRICT: |
e93f83f1 PG |
1550 | raise |
1551 | failed.append ((ipath.get ("path", ""), e)) | |
1552 | continue | |
24ddf0a2 ERE |
1553 | |
1554 | # if the file is not in the index (so it comes from the target | |
1555 | # directory) then we have to delete it | |
1556 | else: | |
c9d47a03 | 1557 | self.logger.debug("delete %s" % upath) |
24ddf0a2 | 1558 | helper.delete(upath) |
42c04ead | 1559 | |
ec57ce53 ERE |
1560 | helper.restore_directories_permissions() |
1561 | index_it.release() | |
1562 | os.chdir(cwd) | |
1563 | helper.cleanup() | |
ea6d3c3e | 1564 | |
e93f83f1 PG |
1565 | return failed |
1566 | ||
1567 | ||
1568 | def recover_backup(self, target_path, backup_indexes_paths=[], | |
1569 | restore_callback=None): | |
1570 | """ | |
1571 | Walk the index, extracting objects in disaster mode. Bad files are | |
1572 | reported along with a reason. | |
1573 | """ | |
1574 | return self.restore_backup(target_path, | |
1575 | backup_indexes_paths=backup_indexes_paths, | |
b750b280 PG |
1576 | disaster=tarfile.TOLERANCE_RECOVER, |
1577 | strict_validation=False) | |
04f4c7ab PG |
1578 | |
1579 | ||
6690f5e0 | 1580 | def rescue_backup(self, target_path, backup_tar_path, |
04f4c7ab PG |
1581 | restore_callback=None): |
1582 | """ | |
1583 | More aggressive “unfsck” mode: do not rely on the index data as the | |
1584 | files may be corrupt; skim files for header-like information and | |
1585 | attempt to retrieve the data. | |
1586 | """ | |
27ee4dd4 PG |
1587 | def gen_volume_name (nvol): |
1588 | return os.path.join (os.path.dirname (backup_tar_path), | |
1589 | self.volume_name_func (backup_tar_path, | |
1590 | True, | |
1591 | nvol)) | |
1592 | ||
1593 | backup_index = tarfile.gen_rescue_index (gen_volume_name, | |
1594 | self.mode, | |
1595 | password=self.password, | |
1596 | key=self.crypto_key) | |
6690f5e0 | 1597 | |
04f4c7ab | 1598 | return self.restore_backup(target_path, |
b84beea7 | 1599 | backup_index=backup_index, |
65b35c42 | 1600 | backup_tar_path=backup_tar_path, |
b750b280 PG |
1601 | disaster=tarfile.TOLERANCE_RESCUE, |
1602 | strict_validation=False) | |
e93f83f1 PG |
1603 | |
1604 | ||
11684b1d ERE |
1605 | def _parse_json_line(self, f, l_no): |
1606 | ''' | |
ee0e095f | 1607 | Read line from file like object and process it as JSON. |
11684b1d ERE |
1608 | ''' |
1609 | l = f.readline() | |
1610 | l_no += 1 | |
1611 | try: | |
be60ffd0 | 1612 | j = json.loads(l.decode('UTF-8')) |
ee0e095f PG |
1613 | except UnicodeDecodeError as e: |
1614 | if tuple (l [0:2]) == tarfile.GZ_MAGIC: | |
1615 | raise Exception \ | |
1616 | ("error parsing line #%d as json: looks like a compressed file (%d B: [%s..])" | |
1617 | % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \ | |
1618 | from e | |
1619 | raise Exception \ | |
1620 | ("error parsing line #%d as json: not a text file (%d B: [%s..])" | |
1621 | % (l_no, len (l), binascii.hexlify (l [:16]).decode ())) \ | |
1622 | from e | |
be60ffd0 | 1623 | except ValueError as e: |
11684b1d ERE |
1624 | raise Exception("error parsing this json line " |
1625 | "(line number %d): %s" % (l_no, l)) | |
1626 | return j, l_no | |
ea6d3c3e | 1627 | |
24ddf0a2 | 1628 | |
ea6d3c3e ERE |
1629 | class RestoreHelper(object): |
1630 | ''' | |
1631 | Class used to help to restore files from indices | |
1632 | ''' | |
1633 | ||
1634 | # holds the dicts of data | |
1635 | _data = [] | |
1636 | ||
1637 | _deltatar = None | |
1638 | ||
1639 | _cwd = None | |
1640 | ||
0501fe0a ERE |
1641 | # list of directories to be restored. This is done as a last step, see |
1642 | # tarfile.extractall for details. | |
1643 | _directories = [] | |
1644 | ||
04f4c7ab | 1645 | _disaster = tarfile.TOLERANCE_STRICT |
e93f83f1 | 1646 | |
037994ca | 1647 | def __init__(self, deltatar, cwd, index_list=None, backup_path=False, |
65b35c42 PG |
1648 | backup_index=None, tarobj=None, |
1649 | disaster=tarfile.TOLERANCE_STRICT): | |
ea6d3c3e ERE |
1650 | ''' |
1651 | Constructor opens the tars and init the data structures. | |
1652 | ||
037994ca PG |
1653 | Assumptions: |
1654 | ||
1655 | - Index list must be provided in reverse order (newer first). | |
1656 | - “newer first” apparently means that if there are n backups | |
1657 | provided, the last full backup is at index n-1 and the most recent | |
1658 | diff backup is at index 0. | |
1659 | - Only the first, the second, and the last elements of | |
1660 | ``index_list`` are relevant, others will not be accessed. | |
1661 | - If no ``index_list`` is provided, both ``tarobj`` and | |
1662 | ``backup_path`` must be passed. | |
1663 | - If ``index_list`` is provided, the values of ``tarobj`` and | |
1664 | ``backup_path`` are ignored. | |
ea6d3c3e ERE |
1665 | ''' |
1666 | self._data = [] | |
0501fe0a | 1667 | self._directories = [] |
ea6d3c3e ERE |
1668 | self._deltatar = deltatar |
1669 | self._cwd = cwd | |
3031b7ae | 1670 | self._password = deltatar.password |
1f3fd7b0 | 1671 | self._crypto_key = deltatar.crypto_key |
3031b7ae | 1672 | self._decryptors = [] |
e93f83f1 | 1673 | self._disaster = disaster |
ea6d3c3e | 1674 | |
b750b280 PG |
1675 | # Disable strict checking for linearly increasing IVs when running |
1676 | # in rescue or recover mode. | |
1677 | strict_validation = disaster == tarfile.TOLERANCE_STRICT | |
1678 | ||
253d4cdd ERE |
1679 | try: |
1680 | import grp, pwd | |
1681 | except ImportError: | |
1682 | grp = pwd = None | |
1683 | ||
1684 | if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: | |
1685 | self.canchown = True | |
1686 | else: | |
1687 | self.canchown = False | |
1688 | ||
65b35c42 | 1689 | if isinstance (backup_index, list) is True: |
001bd488 | 1690 | decryptor = self._deltatar.decryptor |
65b35c42 PG |
1691 | self._data = \ |
1692 | [{ "curr_vol_no" : None | |
1693 | , "vol_fd" : None | |
1694 | , "offset" : -1 | |
1695 | , "tarobj" : None | |
1696 | , "path" : backup_path | |
1697 | , "is_full" : True | |
1698 | , "iterator" : None | |
1699 | , "last_itelement" : None | |
1700 | , "last_lno" : 0 | |
001bd488 PG |
1701 | , "new_volume_handler" : |
1702 | partial(self.new_volume_handler, | |
1703 | self._deltatar, self._cwd, True, | |
1704 | os.path.dirname(backup_path), decryptor) | |
1705 | , "decryptor" : decryptor | |
65b35c42 PG |
1706 | }] |
1707 | elif index_list is not None: | |
24ddf0a2 | 1708 | for index in index_list: |
037994ca | 1709 | is_full = index == index_list[-1] |
24ddf0a2 | 1710 | |
d5e1d60f | 1711 | decryptor = None |
3031b7ae | 1712 | if self._password is not None: |
1f3fd7b0 | 1713 | decryptor = crypto.Decrypt (password=self._password, |
b750b280 PG |
1714 | key=self._crypto_key, |
1715 | strict_ivs=strict_validation) | |
d5e1d60f | 1716 | |
24ddf0a2 ERE |
1717 | # make paths absolute to avoid cwd problems |
1718 | if not os.path.isabs(index): | |
1719 | index = os.path.normpath(os.path.join(cwd, index)) | |
1720 | ||
1721 | s = dict( | |
1722 | curr_vol_no = None, | |
1723 | vol_fd = None, | |
1724 | offset = -1, | |
1725 | tarobj = None, | |
1726 | path = index, | |
1727 | is_full = is_full, | |
1728 | iterator = None, | |
1729 | last_itelement = None, | |
1730 | last_lno = 0, | |
1731 | new_volume_handler = partial(self.new_volume_handler, | |
1732 | self._deltatar, self._cwd, is_full, | |
d5e1d60f PG |
1733 | os.path.dirname(index), decryptor), |
1734 | decryptor = decryptor | |
24ddf0a2 ERE |
1735 | ) |
1736 | self._data.append(s) | |
1737 | else: | |
ea6d3c3e | 1738 | # make paths absolute to avoid cwd problems |
24ddf0a2 ERE |
1739 | if not os.path.isabs(backup_path): |
1740 | backup_path = os.path.normpath(os.path.join(cwd, backup_path)) | |
ea6d3c3e | 1741 | |
ec57ce53 ERE |
1742 | # update the new_volume_handler of tar_obj |
1743 | tarobj.new_volume_handler = partial(self.new_volume_handler, | |
b7c47f38 | 1744 | self._deltatar, self._cwd, True, os.path.dirname(backup_path), |
133d30da | 1745 | self._deltatar.decryptor) |
ea6d3c3e ERE |
1746 | s = dict( |
1747 | curr_vol_no = None, | |
1748 | vol_fd = None, | |
1749 | offset = -1, | |
24ddf0a2 ERE |
1750 | tarobj = tarobj, |
1751 | path = backup_path, | |
1752 | is_full = True, | |
670f9934 ERE |
1753 | iterator = None, |
1754 | last_itelement = None, | |
1755 | last_lno = 0, | |
d5e1d60f PG |
1756 | new_volume_handler = tarobj.new_volume_handler, |
1757 | decryptor = self._deltatar.decryptor | |
ea6d3c3e ERE |
1758 | ) |
1759 | self._data.append(s) | |
1760 | ||
3031b7ae | 1761 | |
ea6d3c3e ERE |
1762 | def cleanup(self): |
1763 | ''' | |
1764 | Closes all open files | |
1765 | ''' | |
1766 | for data in self._data: | |
55b2ffd0 ERE |
1767 | if data['vol_fd']: |
1768 | data['vol_fd'].close() | |
1769 | data['vol_fd'] = None | |
ea6d3c3e ERE |
1770 | if data['tarobj']: |
1771 | data['tarobj'].close() | |
1772 | data['tarobj'] = None | |
ea6d3c3e ERE |
1773 | |
1774 | def delete(self, path): | |
1775 | ''' | |
1776 | Delete a file | |
1777 | ''' | |
df99a044 ERE |
1778 | if not os.path.exists(path): |
1779 | return | |
1780 | ||
24ddf0a2 | 1781 | # to preserve parent directory mtime, we save it |
283fbd5e | 1782 | parent_dir = os.path.dirname(path) or os.getcwd() |
24ddf0a2 ERE |
1783 | parent_dir_mtime = int(os.stat(parent_dir).st_mtime) |
1784 | ||
561bc39f | 1785 | if os.path.isdir(path) and not os.path.islink(path): |
ea6d3c3e ERE |
1786 | shutil.rmtree(path) |
1787 | else: | |
1788 | os.unlink(path) | |
1789 | ||
24ddf0a2 ERE |
1790 | # now we restore parent_directory mtime |
1791 | os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime)) | |
1792 | ||
4da27cfe | 1793 | def restore(self, itpath, l_no, callback=None): |
ea6d3c3e | 1794 | ''' |
8a54d5dd | 1795 | Restore the path from the appropriate backup. Receives the current path |
e8d95fe5 | 1796 | from the newest (=first) index iterator. itpath must be not null. |
b0aef801 | 1797 | callback is a custom function that gets called for every file. |
037994ca PG |
1798 | |
1799 | NB: This function takes the attribute ``_data`` as input but will only | |
1800 | ever use its first and, if available, second element. Anything else in | |
1801 | ``._data[]`` will be ignored. | |
ea6d3c3e | 1802 | ''' |
ea6d3c3e ERE |
1803 | path = itpath['path'] |
1804 | ||
4da27cfe SA |
1805 | # Calls the callback function |
1806 | if callback: | |
1807 | callback() | |
1808 | ||
ea6d3c3e | 1809 | if path.startswith('delete://'): |
df86af81 ERE |
1810 | # the file has previously been deleted already in restore_backup in |
1811 | # all cases so we just need to finish | |
ea6d3c3e | 1812 | return |
df86af81 | 1813 | |
e8d95fe5 | 1814 | # get data from newest index (_data[0]) |
df86af81 ERE |
1815 | data = self._data[0] |
1816 | upath = self._deltatar.unprefixed(path) | |
1817 | ||
24ddf0a2 | 1818 | # to preserve parent directory mtime, we save it |
283fbd5e | 1819 | parent_dir = os.path.dirname(upath) or os.getcwd() |
37ab0f57 | 1820 | os.makedirs(parent_dir, exist_ok=True) |
24ddf0a2 ERE |
1821 | parent_dir_mtime = int(os.stat(parent_dir).st_mtime) |
1822 | ||
e8d95fe5 | 1823 | # if path is found in the newest index as to be snapshotted, deal with it |
df86af81 ERE |
1824 | # and finish |
1825 | if path.startswith('snapshot://'): | |
65b35c42 | 1826 | self.restore_file(itpath, data, path, l_no, upath) |
24ddf0a2 ERE |
1827 | |
1828 | # now we restore parent_directory mtime | |
1829 | os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime)) | |
ea6d3c3e ERE |
1830 | return |
1831 | ||
1832 | # we go from index to index, finding the path in the index, then finding | |
1833 | # the index with the most recent snapshot of the file being restored | |
e8d95fe5 TJ |
1834 | # |
1835 | # Right now we support diff backups, only. No incremental backups. | |
1836 | # As a result _data[0] is always the diff backup index | |
1837 | # and _data[1] the full backup index. | |
527670c4 | 1838 | if len(self._data) == 2: |
7273719c | 1839 | data = self._data[1] |
527670c4 TJ |
1840 | d, l_no, dpath = self.find_path_in_index(data, upath) |
1841 | if not d: | |
1842 | self._deltatar.logger.warning('Error restoring file %s from ' | |
1843 | 'index, not found in index %s' % (path, data['path'])) | |
1844 | return | |
1845 | ||
1846 | cur_path = d.get('path', '') | |
1847 | if cur_path.startswith('delete://'): | |
1848 | self._deltatar.logger.warning(('Strange thing happened, file ' | |
1849 | '%s was listed in first index but deleted by another ' | |
1850 | 'one. Path was ignored and untouched.') % path) | |
1851 | return | |
1852 | elif cur_path.startswith('snapshot://'): | |
1853 | # this code path is reached when the file is unchanged | |
1854 | # in the newest index and therefore of type 'list://' | |
1855 | self.restore_file(d, data, path, l_no, dpath) | |
1856 | ||
1857 | # now we restore parent_directory mtime | |
1858 | os.utime(parent_dir, (parent_dir_mtime, parent_dir_mtime)) | |
1859 | return | |
1860 | ||
1861 | # error code path is reached when: | |
1862 | # a) we have more than two indexes (unsupported atm) | |
1863 | # b) both indexes contain a list:// entry (logic error) | |
1864 | # c) we have just one index and it also contains list:// | |
4bda6f45 | 1865 | self._deltatar.logger.warning(('Error restoring file %s from index, ' |
ea6d3c3e ERE |
1866 | 'snapshot not found in any index') % path) |
1867 | ||
670f9934 ERE |
1868 | def find_path_in_index(self, data, upath): |
1869 | # NOTE: we restart the iterator sometimes because the iterator can be | |
1870 | # walked over completely multiple times, for example if one path if not | |
1871 | # found in one index and we have to go to the next index. | |
7273719c PG |
1872 | it = data['iterator'] |
1873 | if it is None: | |
670f9934 | 1874 | it = data['iterator'] = self._deltatar.iterate_index_path(data["path"]) |
be60ffd0 | 1875 | d, l_no = it.__next__() |
670f9934 | 1876 | else: |
670f9934 ERE |
1877 | d = data['last_itelement'] |
1878 | l_no = data['last_lno'] | |
1879 | ||
670f9934 | 1880 | while True: |
7273719c | 1881 | dpath = self._deltatar.unprefixed(d.get('path', '')) |
670f9934 ERE |
1882 | if upath == dpath: |
1883 | data['last_itelement'] = d | |
1884 | data['last_lno'] = l_no | |
1885 | return d, l_no, dpath | |
1886 | ||
1887 | up, dp = self._deltatar.compare_indexes(upath, dpath) | |
1888 | # any time upath should have appeared before current dpath, it means | |
1889 | # upath is just not in this index and we should stop | |
1890 | if dp is None: | |
1891 | data['last_itelement'] = d | |
1892 | data['last_lno'] = l_no | |
1893 | return None, 0, '' | |
1894 | ||
1895 | try: | |
be60ffd0 | 1896 | d, l_no = it.__next__() |
670f9934 ERE |
1897 | except StopIteration: |
1898 | data['last_itelement'] = d | |
1899 | data['last_lno'] = l_no | |
1900 | return None, 0, '' | |
670f9934 | 1901 | |
0501fe0a ERE |
1902 | def restore_directories_permissions(self): |
1903 | ''' | |
1904 | Restore directory permissions when everything have been restored | |
1905 | ''' | |
42c04ead ERE |
1906 | try: |
1907 | import grp, pwd | |
1908 | except ImportError: | |
1909 | grp = pwd = None | |
1910 | ||
0501fe0a ERE |
1911 | self._directories.sort(key=operator.attrgetter('name')) |
1912 | self._directories.reverse() | |
0501fe0a ERE |
1913 | |
1914 | # Set correct owner, mtime and filemode on directories. | |
1915 | for member in self._directories: | |
1916 | dirpath = member.name | |
1917 | try: | |
42c04ead ERE |
1918 | os.chmod(dirpath, member.mode) |
1919 | os.utime(dirpath, (member.mtime, member.mtime)) | |
253d4cdd | 1920 | if self.canchown: |
42c04ead ERE |
1921 | # We have to be root to do so. |
1922 | try: | |
1923 | g = grp.getgrnam(member.gname)[2] | |
1924 | except KeyError: | |
1925 | g = member.gid | |
1926 | try: | |
1927 | u = pwd.getpwnam(member.uname)[2] | |
1928 | except KeyError: | |
1929 | u = member.uid | |
1930 | try: | |
4e433e00 | 1931 | if member.issym and hasattr(os, "lchown"): |
42c04ead ERE |
1932 | os.lchown(dirpath, u, g) |
1933 | else: | |
1934 | os.chown(dirpath, u, g) | |
1935 | except EnvironmentError: | |
1936 | raise tarfile.ExtractError("could not change owner") | |
1937 | ||
be60ffd0 | 1938 | except tarfile.ExtractError as e: |
4bda6f45 | 1939 | self._deltatar.logger.warning('tarfile: %s' % e) |
0501fe0a | 1940 | |
df86af81 | 1941 | @staticmethod |
b750b280 | 1942 | def new_volume_handler(deltarobj, cwd, is_full, backup_path, decryptor, tarobj, base_name, volume_number): |
ea6d3c3e | 1943 | ''' |
b750b280 PG |
1944 | Set up a new volume and perform the tasks necessary for transitioning |
1945 | to the next one. | |
ea6d3c3e | 1946 | ''' |
df86af81 ERE |
1947 | volume_name = deltarobj.volume_name_func(backup_path, is_full, |
1948 | volume_number, guess_name=True) | |
ea6d3c3e ERE |
1949 | volume_path = os.path.join(backup_path, volume_name) |
1950 | ||
1951 | # we convert relative paths into absolute because CWD is changed | |
1952 | if not os.path.isabs(volume_path): | |
1953 | volume_path = os.path.join(cwd, volume_path) | |
b750b280 PG |
1954 | |
1955 | tarobj.open_volume(volume_path, encryption=decryptor) | |
ea6d3c3e | 1956 | |
253d4cdd | 1957 | def restore_file(self, file_data, index_data, path, l_no, unprefixed_path): |
ea6d3c3e ERE |
1958 | ''' |
1959 | Restores a snapshot of a file from a specific backup | |
1960 | ''' | |
ea6d3c3e | 1961 | op_type = file_data.get('type', -1) |
24ddf0a2 | 1962 | member = file_data.get('member', None) |
9f9ae874 | 1963 | ismember = bool(member) |
24ddf0a2 ERE |
1964 | |
1965 | # when member is set, then we can assume everything is right and we | |
1966 | # just have to restore the path | |
a2a37de7 | 1967 | if member is None: |
24ddf0a2 ERE |
1968 | vol_no = file_data.get('volume', -1) |
1969 | # sanity check | |
1970 | if not isinstance(vol_no, int) or vol_no < 0: | |
4bda6f45 | 1971 | self._deltatar.logger.warning('unrecognized type to be restored: ' |
24ddf0a2 ERE |
1972 | '%s, line %d' % (op_type, l_no)) |
1973 | ||
1974 | # setup the volume that needs to be read. only needed when member is | |
1975 | # not set | |
a2a37de7 | 1976 | if index_data['curr_vol_no'] != vol_no: |
24ddf0a2 ERE |
1977 | index_data['curr_vol_no'] = vol_no |
1978 | backup_path = os.path.dirname(index_data['path']) | |
1979 | vol_name = self._deltatar.volume_name_func(backup_path, | |
1980 | index_data['is_full'], vol_no, guess_name=True) | |
1981 | vol_path = os.path.join(backup_path, vol_name) | |
1982 | if index_data['vol_fd']: | |
1983 | index_data['vol_fd'].close() | |
be60ffd0 | 1984 | index_data['vol_fd'] = open(vol_path, 'rb') |
24ddf0a2 ERE |
1985 | |
1986 | # force reopen of the tarobj because of new volume | |
1987 | if index_data['tarobj']: | |
1988 | index_data['tarobj'].close() | |
1989 | index_data['tarobj'] = None | |
1990 | ||
1991 | # seek tarfile if needed | |
1992 | offset = file_data.get('offset', -1) | |
ea6d3c3e | 1993 | if index_data['tarobj']: |
c52fd26b | 1994 | if self._disaster == tarfile.TOLERANCE_RESCUE: |
24ddf0a2 ERE |
1995 | # force a seek and reopen |
1996 | index_data['tarobj'].close() | |
1997 | index_data['tarobj'] = None | |
c52fd26b PG |
1998 | else: |
1999 | try: | |
2000 | member = index_data['tarobj'].__iter__().__next__() | |
2001 | except tarfile.DecryptionError: | |
2002 | pass | |
2003 | except tarfile.CompressionError: | |
2004 | pass | |
2005 | ||
2006 | if not member or member.path != file_data['path']: | |
2007 | # force a seek and reopen | |
2008 | index_data['tarobj'].close() | |
2009 | index_data['tarobj'] = None | |
2010 | ||
24ddf0a2 ERE |
2011 | |
2012 | # open the tarfile if needed | |
2013 | if not index_data['tarobj']: | |
2014 | index_data['vol_fd'].seek(offset) | |
2015 | index_data['tarobj'] = tarfile.open(mode="r" + self._deltatar.mode, | |
2016 | fileobj=index_data['vol_fd'], | |
2017 | format=tarfile.GNU_FORMAT, | |
d1c38f40 | 2018 | concat='#' in self._deltatar.mode, |
d5e1d60f | 2019 | encryption=index_data["decryptor"], |
253d4cdd | 2020 | new_volume_handler=index_data['new_volume_handler'], |
044585c6 | 2021 | save_to_members=False, |
04f4c7ab | 2022 | tolerance=self._disaster) |
24ddf0a2 | 2023 | |
be60ffd0 | 2024 | member = index_data['tarobj'].__iter__().__next__() |
ea6d3c3e | 2025 | |
253d4cdd ERE |
2026 | member.path = unprefixed_path |
2027 | member.name = unprefixed_path | |
0501fe0a ERE |
2028 | |
2029 | if op_type == 'directory': | |
253d4cdd | 2030 | self.add_member_dir(member) |
0501fe0a | 2031 | member = copy.copy(member) |
be60ffd0 | 2032 | member.mode = 0o0700 |
0501fe0a | 2033 | |
df86af81 ERE |
2034 | # if it's an existing directory, we then don't need to recreate it |
2035 | # just set the right permissions, mtime and that kind of stuff | |
2036 | if os.path.exists(member.path): | |
2037 | return | |
2038 | ||
9f9ae874 | 2039 | if not ismember: |
24ddf0a2 ERE |
2040 | # set current volume number in tarobj, otherwise the extraction of the |
2041 | # file might fail when trying to extract a multivolume member | |
2042 | index_data['tarobj'].volume_number = index_data['curr_vol_no'] | |
86a6e741 | 2043 | |
9b13f5c4 PG |
2044 | def ignore_symlink (member, *_args): |
2045 | self._deltatar.logger.warning("Ignoring symlink %s" % member.name) | |
786addd6 | 2046 | |
ea6d3c3e | 2047 | # finally, restore the file |
c650acfa PG |
2048 | index_data['tarobj'].extract(member, symlink_cb=ignore_symlink, |
2049 | unlink=True) | |
253d4cdd ERE |
2050 | |
2051 | def add_member_dir(self, member): | |
2052 | ''' | |
2053 | Add member dir to be restored at the end | |
2054 | ''' | |
4e433e00 | 2055 | if not self.canchown: |
253d4cdd ERE |
2056 | self._directories.append(DirItem(name=member.name, mode=member.mode, |
2057 | mtime=member.mtime)) | |
2058 | else: | |
2059 | self._directories.append(DirItem(name=member.name, mode=member.mode, | |
2060 | mtime=member.mtime, gname=member.gname, uname=member.uname, | |
4e433e00 | 2061 | uid=member.uid, gid=member.gid, issym=member.issym())) |
253d4cdd ERE |
2062 | |
2063 | class DirItem(object): | |
2064 | def __init__(self, **kwargs): | |
be60ffd0 | 2065 | for k, v in kwargs.items(): |
9f9ae874 | 2066 | setattr(self, k, v) |