From a177e6cd1074ea061136426028188ed7bdb9983d Mon Sep 17 00:00:00 2001 From: Christian Herdtweck Date: Wed, 29 Jun 2016 17:00:13 +0200 Subject: [PATCH] copied function is_glob that I created for oletools --- src/file_helpers.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 files changed, 41 insertions(+), 0 deletions(-) diff --git a/src/file_helpers.py b/src/file_helpers.py index 8ad76f4..d35954b 100644 --- a/src/file_helpers.py +++ b/src/file_helpers.py @@ -467,3 +467,44 @@ def size_str(byte_number, is_diff=False): # undo last "/factor" and show thousand-separator return '{2}{0:,d}{1}'.format(int(round(curr_num*factor)), units[-1], sign_str) + + +#: regular expression defining a char range in glob/fnmatch: +#: matches for example: bla_[abcd]_bla, bla_[a-d]_bla, bla_[a-dA-D]_bla, +#: bla_[a-dxyz]_bla, bla_[]_bla +#GLOB_RANGE = re.compile('[^\[\]]*\[?:((?:\S-\S)|\S)*\][^\[\]]*') + + +def is_glob(filespec): + """ determine if given file specification is a single file name or a glob + + python's glob and fnmatch can only interpret ?, *, [list], and [ra-nge], + the special chars *?[-] can only be escaped using [] + --> file_name is not a glob + --> file?name is a glob + --> file* is a glob + --> file[-._]name is a glob + --> file[?]name is not a glob (matches literal "file?name") + --> file[*]name is not a glob (matches literal "file*name") + --> file[-]name is not a glob (matches literal "file-name") + --> file-name is not a glob + + Also, obviously incorrect globs are treated as non-globs + --> file[name is not a glob + --> file]-[name is treated as a glob + (it is not a valid glob but detecting errors like this requires + sophisticated regular expression matching) + + Python's glob also works with globs in directory-part of path + --> dir-part of path is analyzed just like filename-part + --> thirdparty/*/xglob.py is a (valid) glob + """ + + # remove escaped special chars + cleaned = filespec.replace('[*]', '').replace('[?]', '') \ + .replace('[[]', '').replace('[]]', '').replace('[-]', '') + + # check if special chars remain + return '*' in cleaned or '?' in cleaned or \ + ('[' in cleaned and ']' in cleaned) + #and GLOB_RANGE.match(cleaned) is not None) -- 1.7.1