134 lines
4.2 KiB
Python
134 lines
4.2 KiB
Python
"""
|
|
File _list_ globbing utility
|
|
|
|
This code is based on glob but varies slightly in that it works on a list of files/paths that is passed rather than a
|
|
directory/pathname.
|
|
|
|
This is useful for multi-tiered glob rules or applying glob rules to a known set of files.
|
|
"""
|
|
# -*- coding: utf-8 -*-
|
|
import os
|
|
import re
|
|
from glob import has_magic
|
|
|
|
__all__ = ["glib", "_iglib"]
|
|
|
|
__author__ = "Grant Hulegaard"
|
|
__copyright__ = "Copyright (C) Nginx, Inc. All rights reserved."
|
|
__license__ = ""
|
|
__maintainer__ = "Grant Hulegaard"
|
|
__email__ = "grant.hulegaard@nginx.com"
|
|
|
|
|
|
# Globals
|
|
|
|
# Match functions (for different match types)
|
|
|
|
def _combined_match(file_pathname, regex):
|
|
return bool(regex.match(file_pathname))
|
|
|
|
|
|
def _directory_match(file_pathname, regex):
|
|
# dirname is returned without trailing slash
|
|
dirname, _ = os.path.split(file_pathname)
|
|
return bool(regex.match(dirname + '/'))
|
|
|
|
|
|
def _filename_match(file_pathname, regex):
|
|
_, tail = os.path.split(file_pathname)
|
|
return bool(regex.match(tail))
|
|
|
|
|
|
PATHNAME_MAP = {
|
|
'combined': _combined_match,
|
|
'directory': _directory_match,
|
|
'filename': _filename_match
|
|
}
|
|
|
|
|
|
def glib(file_list, pathname_pattern):
|
|
"""
|
|
Return a subset of the file_list passed that contains only files matching a pathname pattern.
|
|
|
|
The pattern may contain simple shell-style wildcards a la fnmatch. However, unlike fnmatch, filenames starting
|
|
with a dot are special cases that are not matched by '*' and '?'
|
|
|
|
:param file_list: List of string pathnames
|
|
:param pathname_pattern: String pathname pattern
|
|
:return: List
|
|
"""
|
|
return list(_iglib(file_list, pathname_pattern))
|
|
|
|
|
|
# Helpers
|
|
|
|
def _iglib(file_list, pathname_pattern):
|
|
"""
|
|
Return an iterator which yields a subset of the passed file_list matching the pathname pattern.
|
|
|
|
The pattern may contain simple shell-style wildcards a la fnmatch. However, unlike fnmatch, filenames starting
|
|
with a dot are special cases that are not matched by '*' and '?'
|
|
|
|
:param file_list: List of String pathnames
|
|
:param pathname_pattern: String pathname pattern
|
|
:return: Iterator
|
|
"""
|
|
try:
|
|
dirname, tail = os.path.split(pathname_pattern)
|
|
except:
|
|
dirname, tail = None, None
|
|
|
|
# Set type based on what info was in pathname pattern
|
|
pathname_type = None
|
|
if dirname and tail:
|
|
pathname_type = 'combined'
|
|
elif dirname and not tail:
|
|
pathname_type = 'directory'
|
|
elif not dirname and tail:
|
|
pathname_type = 'filename'
|
|
|
|
if not pathname_type:
|
|
raise TypeError('Expected pathname pattern, got "%s" (type: %s)' % (pathname_pattern, type(pathname_pattern)))
|
|
|
|
glib_regex = _glib_regex(pathname_pattern)
|
|
|
|
for file_pathname in file_list:
|
|
if PATHNAME_MAP[pathname_type](file_pathname, glib_regex):
|
|
yield file_pathname
|
|
|
|
|
|
def _glib_regex(pathname_pattern):
|
|
"""
|
|
Helper for taking pathname patterns and converting them into Python regexes with Unix pathname matching behavior.
|
|
|
|
:param pathname_pattern: String pathname
|
|
:return: Compiled Regex
|
|
"""
|
|
# First escape '.'
|
|
pathname_pattern.replace('.', '\.')
|
|
|
|
if has_magic(pathname_pattern):
|
|
# Replace unspecific '*' and '?' with regex appropriate specifiers ('.')
|
|
for special_char in ('*', '?'):
|
|
split_pattern = pathname_pattern.split(special_char)
|
|
|
|
new_split_pattern = []
|
|
# For each section, if there is no regex appropriate closure, add a generic catch.
|
|
for bucket in split_pattern:
|
|
if bucket:
|
|
# If previous character is not regex closure and is not end of string, then add char...
|
|
if bucket[-1] != ']' and split_pattern.index(bucket) != len(split_pattern) - 1:
|
|
bucket += '[^/]'
|
|
elif split_pattern.index(bucket) == 0:
|
|
# If match char was beginning of string, add regex char...
|
|
bucket += '[^/]'
|
|
|
|
new_split_pattern.append(bucket)
|
|
|
|
# Rejoin on special characters
|
|
pathname_pattern = special_char.join(new_split_pattern)
|
|
|
|
return re.compile(pathname_pattern)
|
|
|
|
# TODO: Add better variable guarantees via type checking/casting, or unicode spoofing (see glob for reference).
|