From 8a8fadda48aee4e6d02fd8b09cda143af92c74db Mon Sep 17 00:00:00 2001 From: Eduardo Robles Elvira Date: Wed, 31 Jul 2013 17:58:28 +0200 Subject: [PATCH] implementing filter_func in deltatar --- deltatar/deltatar.py | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 79 insertions(+), 0 deletions(-) diff --git a/deltatar/deltatar.py b/deltatar/deltatar.py index 489523f..c7eea43 100644 --- a/deltatar/deltatar.py +++ b/deltatar/deltatar.py @@ -22,6 +22,7 @@ import logging import datetime import binascii import os +import re import stat import json from functools import partial @@ -190,6 +191,77 @@ class DeltaTar(object): return "%s-%s-%03d.tar%s" % (prefix, date_str, volume_number + 1, extension) + def filter_path(self, path, source_path=""): + ''' + Filters a path, given the source_path, using the filtering properties + set in the constructor. + The filtering order is: + 1. included_files (if any) + 2. excluded_files + 3. filter_func (which must return whether the file is accepted or not) + ''' + #print "filter: path '%s', source_path: '%s'" % (path, source_path) + + if len(path) > 0: + path = path[len(source_path):] + + # 1. filter included_files + if len(self.included_files) > 0: + for i in self.included_files: + # it can be either a regexp or a string + if isinstance(i, str): + # if the string matches, then continue + if i == path: + continue + + # if the string ends with / it's a directory, and if the + # path does not start with the directory, then it's not + # included + if i.endswith('/') and not path.startswith(i): + return False + + # if the string doesn't end with /, add it and do the same + # check + elif not path.startswith(i + '/'): + return False + + # if it's a reg exp, then we just check if it matches + elif isinstance(i, re._pattern_type): + if not i.match(path): + return False + else: + self.logger.warn('Invalid pattern in included_files: %s' % str(i)) + + for e in self.excluded_files: + # it can be either a regexp or a string + if isinstance(i, str): + # if the string matches, then exclude + if i == path: + return False + + # if the string ends with / it's a directory, and if the + # path starts with the directory, then exclude + if i.endswith('/') and path.startswith(i): + return False + + # if the string doesn't end with /, do the same check with + # the slash added + elif path.startswith(i + '/'): + return False + + # if it's a reg exp, then we just check if it matches + elif isinstance(i, re._pattern_type): + if i.match(path): + return False + else: + self.logger.warn('Invalid pattern in excluded_files: %s' % str(i)) + + if self.filter_func: + return self.filter_func(path) + + return True + + def _recursive_walk_dir(self, source_path): ''' Walk a directory recursively, yielding each file/directory @@ -203,6 +275,8 @@ class DeltaTar(object): ''' for filename in os.listdir(dir_path): file_path = os.path.join(dir_path, filename) + if not self.filter_path(file_path, source_path): + continue if not os.access(file_path, os.R_OK): self.logger.warn('Error accessing possibly locked file %s' % file_path) continue @@ -222,9 +296,14 @@ class DeltaTar(object): if delayed_path_stack: for delayed_path in delayed_path_stack: + if not self.filter_path(delayed_path, source_path): + continue yield delayed_path del delayed_path_stack[:] + if not self.filter_path(cur_path, source_path): + continue + yield cur_path if os.path.isdir(cur_path): -- 1.7.1