fix access race when traversing the filesystem
authorPhilipp Gesang <philipp.gesang@intra2net.com>
Wed, 4 Jul 2018 08:29:09 +0000 (10:29 +0200)
committerThomas Jarosch <thomas.jarosch@intra2net.com>
Sat, 1 Feb 2020 14:14:06 +0000 (15:14 +0100)
Related to issue #6440.

Yet another race caused by mishandling of access(3); fix by
acquiring an fd to the directory and use that for iteration.
Handle ENOENT around open(2) and throw out the call to
os.path.exists().

deltatar/deltatar.py

index e66c687..057c98f 100644 (file)
@@ -427,6 +427,10 @@ class DeltaTar(object):
     def _recursive_walk_dir(self, source_path, keep_base_dir=False):
         '''
         Walk a directory recursively, yielding each file/directory
+
+        Returns the path of an entity. If ``keep_base_dir`` is set,
+        the path returned contains the prefix ``source_path``; otherwise it is
+        relative to the prefix.
         '''
 
         source_path = source_path.rstrip(os.sep)
@@ -441,25 +445,28 @@ class DeltaTar(object):
         while queue:
             cur_path = queue.pop(0)
 
-            # it might have been removed in the mean time
-            if not os.path.exists(cur_path):
+            dfd = os.open (cur_path, os.O_DIRECTORY)
+            if dfd == -1: # it might have been removed in the meantime
                 continue
 
-            for filename in sorted(os.listdir(cur_path)):
-                child = os.path.join(cur_path, filename)
-                is_dir = os.path.isdir(child)
-                status = self.filter_path(child, source_path, is_dir)
-                if status == NO_MATCH:
-                    continue
-                if not os.access(child, os.R_OK):
-                    self.logger.warning('Error accessing possibly locked file %s' % child)
-                    continue
-
-                if status == MATCH:
-                    yield child[beginning_size:]
-
-                if is_dir and (status == MATCH or status == PARENT_MATCH):
-                    queue.append(child)
+            try:
+                for filename in sorted(os.listdir(dfd)):
+                    child = os.path.join(cur_path, filename)
+                    is_dir = os.path.isdir(child)
+                    status = self.filter_path(child, source_path, is_dir)
+                    if status == NO_MATCH:
+                        continue
+                    if not os.access(child, os.R_OK):
+                        self.logger.warning('Error accessing possibly locked file %s' % child)
+                        continue
+
+                    if status == MATCH:
+                        yield child[beginning_size:]
+
+                    if is_dir and (status == MATCH or status == PARENT_MATCH):
+                        queue.append(child)
+            finally:
+                os.close (dfd)
 
     def _stat_dict(self, path):
         '''