implementing filter_func in deltatar
authorEduardo Robles Elvira <edulix@wadobo.com>
Wed, 31 Jul 2013 15:58:28 +0000 (17:58 +0200)
committerEduardo Robles Elvira <edulix@wadobo.com>
Wed, 31 Jul 2013 15:58:40 +0000 (17:58 +0200)
deltatar/deltatar.py

index 489523f..c7eea43 100644 (file)
@@ -22,6 +22,7 @@ import logging
 import datetime
 import binascii
 import os
+import re
 import stat
 import json
 from functools import partial
@@ -190,6 +191,77 @@ class DeltaTar(object):
 
         return "%s-%s-%03d.tar%s" % (prefix, date_str, volume_number + 1, extension)
 
+    def filter_path(self, path, source_path=""):
+        '''
+        Filters a path, given the source_path, using the filtering properties
+        set in the constructor.
+        The filtering order is:
+        1. included_files (if any)
+        2. excluded_files
+        3. filter_func (which must return whether the file is accepted or not)
+        '''
+        #print "filter: path '%s', source_path: '%s'" % (path, source_path)
+
+        if len(path) > 0:
+            path = path[len(source_path):]
+
+        # 1. filter included_files
+        if len(self.included_files) > 0:
+            for i in self.included_files:
+                # it can be either a regexp or a string
+                if isinstance(i, str):
+                    # if the string matches, then continue
+                    if i == path:
+                        continue
+
+                    # if the string ends with / it's a directory, and if the
+                    # path does not start with the directory, then it's not
+                    # included
+                    if i.endswith('/') and not path.startswith(i):
+                        return False
+
+                    # if the string doesn't end with /, add it and do the same
+                    # check
+                    elif not path.startswith(i + '/'):
+                        return False
+
+                # if it's a reg exp, then we just check if it matches
+                elif isinstance(i, re._pattern_type):
+                    if not i.match(path):
+                        return False
+                else:
+                    self.logger.warn('Invalid pattern in included_files: %s' % str(i))
+
+            for e in self.excluded_files:
+                # it can be either a regexp or a string
+                if isinstance(i, str):
+                    # if the string matches, then exclude
+                    if i == path:
+                        return False
+
+                    # if the string ends with / it's a directory, and if the
+                    # path starts with the directory, then exclude
+                    if i.endswith('/') and path.startswith(i):
+                        return False
+
+                    # if the string doesn't end with /, do the same check with
+                    # the slash added
+                    elif path.startswith(i + '/'):
+                        return False
+
+                # if it's a reg exp, then we just check if it matches
+                elif isinstance(i, re._pattern_type):
+                    if i.match(path):
+                        return False
+                else:
+                    self.logger.warn('Invalid pattern in excluded_files: %s' % str(i))
+
+        if self.filter_func:
+            return self.filter_func(path)
+
+        return True
+
+
     def _recursive_walk_dir(self, source_path):
         '''
         Walk a directory recursively, yielding each file/directory
@@ -203,6 +275,8 @@ class DeltaTar(object):
             '''
             for filename in os.listdir(dir_path):
                 file_path = os.path.join(dir_path, filename)
+                if not self.filter_path(file_path, source_path):
+                    continue
                 if not os.access(file_path, os.R_OK):
                     self.logger.warn('Error accessing possibly locked file %s' % file_path)
                     continue
@@ -222,9 +296,14 @@ class DeltaTar(object):
 
             if delayed_path_stack:
                 for delayed_path in delayed_path_stack:
+                    if not self.filter_path(delayed_path, source_path):
+                        continue
                     yield delayed_path
                 del delayed_path_stack[:]
 
+            if not self.filter_path(cur_path, source_path):
+                continue
+
             yield cur_path
 
             if os.path.isdir(cur_path):