Normalizes replacement of `<SKIPLINES>` (moved to _resolveHostTag, so will be replaced together with another tags);

Regex will be compiled as MULTILINE only if needed (buffering with `maxlines` > 1), that enables:
- improve performance by the single line parsing;
- make regex more precise (because distinguish between anchors `^`/`$` for the begin/end of string and the new-line character '\n', e. g. if coming from filters (like systemd journal) that allow the parsing of log-entries contain new-line chars (as single entry);
pull/1733/head
sebres 2017-03-23 22:02:37 +01:00
parent b650503f00
commit 61c1bdfe79
3 changed files with 25 additions and 11 deletions

View File

@ -103,20 +103,17 @@ class Regex:
# avoid construction of invalid object.
# @param value the regular expression
def __init__(self, regex, **kwargs):
def __init__(self, regex, multiline=False, **kwargs):
self._matchCache = None
# Perform shortcuts expansions.
# Resolve "<HOST>" tag using default regular expression for host:
# Replace standard f2b-tags (like "<HOST>", etc) using default regular expressions:
regex = Regex._resolveHostTag(regex, **kwargs)
# Replace "<SKIPLINES>" with regular expression for multiple lines.
regexSplit = regex.split("<SKIPLINES>")
regex = regexSplit[0]
for n, regexLine in enumerate(regexSplit[1:]):
regex += "\n(?P<skiplines%i>(?:(.*\n)*?))" % n + regexLine
#
if regex.lstrip() == '':
raise RegexException("Cannot add empty regex")
flags = re.MULTILINE if (multiline or "\n" in regex or r"\n" in regex) else 0
try:
self._regexObj = re.compile(regex, re.MULTILINE)
self._regexObj = re.compile(regex, flags)
self._regex = regex
except sre_constants.error:
raise RegexException("Unable to compile regular expression '%s'" %
@ -125,6 +122,11 @@ class Regex:
def __str__(self):
return "%s(%r)" % (self.__class__.__name__, self._regex)
@property
def flags(self):
"""Returns the regex matching flags combination of the compiled regex object"""
return self._regexObj.flags
##
# Replaces "<HOST>", "<IP4>", "<IP6>", "<FID>" with default regular expression for host
#
@ -135,6 +137,9 @@ class Regex:
def _resolveHostTag(regex, useDns="yes"):
openTags = dict()
props = {
'nl': 0, # new lines counter by <SKIPLINES> tag;
}
# tag interpolation callable:
def substTag(m):
tag = m.group()
@ -142,6 +147,11 @@ class Regex:
# 3 groups instead of <HOST> - separated ipv4, ipv6 and host (dns)
if tn == "HOST":
return R_HOST[RI_HOST if useDns not in ("no",) else RI_ADDR]
# replace "<SKIPLINES>" with regular expression for multiple lines (by buffering with maxlines)
if tn == "SKIPLINES":
nl = props['nl']
props['nl'] = nl + 1
return r"\n(?P<skiplines%i>(?:(?:.*\n)*?))" % (nl,)
# static replacement from RH4TAG:
try:
return RH4TAG[tn]

View File

@ -161,10 +161,14 @@ class Filter(JailThread):
# @param value the regular expression
def addFailRegex(self, value):
multiLine = self.getMaxLines() > 1
try:
regex = FailRegex(value, prefRegex=self.__prefRegex, useDns=self.__useDns)
regex = FailRegex(value, prefRegex=self.__prefRegex, multiline=multiLine,
useDns=self.__useDns)
self.__failRegex.append(regex)
if "\n" in regex.getRegex() and not self.getMaxLines() > 1:
regexExpr = regex.getRegex()
# check new lines present in regex (was compiled as multiline), incorrect by `maxlines=1`:
if (regex.flags & re.MULTILINE) and not multiLine:
logSys.warning(
"Mutliline regex set for jail %r "
"but maxlines not greater than 1", self.jailName)

View File

@ -41,7 +41,7 @@ TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), "files")
# regexp to test greedy catch-all should be not-greedy:
RE_HOST = Regex('<HOST>').getRegex()
RE_WRONG_GREED = re.compile(r'\.[+\*](?!\?).*' + re.escape(RE_HOST) + r'.*(?:\.[+\*].*|[^\$])$')
RE_WRONG_GREED = re.compile(r'\.[+\*](?!\?)[^\$\^]*' + re.escape(RE_HOST) + r'.*(?:\.[+\*].*|[^\$])$')
class FilterSamplesRegex(unittest.TestCase):