Normalizes replacement of `<SKIPLINES>` (moved to _resolveHostTag, so will be replaced together with another tags);

Regex will be compiled as MULTILINE only if needed (buffering with `maxlines` > 1), that enables: - improve performance by the single line parsing; - make regex more precise (because distinguish between anchors `^`/`$` for the begin/end of string and the new-line character '\n', e. g. if coming from filters (like systemd journal) that allow the parsing of log-entries contain new-line chars (as single entry);
2017-03-23 22:02:37 +01:00 · 2017-03-23 22:02:37 +01:00 · 61c1bdfe79
parent b650503f00
commit 61c1bdfe79
3 changed files with 25 additions and 11 deletions
--- a/fail2ban/server/failregex.py
+++ b/fail2ban/server/failregex.py
@ -103,20 +103,17 @@ class Regex:
 	# avoid construction of invalid object.
 	# @param value the regular expression
 	
-	def __init__(self, regex, **kwargs):
+	def __init__(self, regex, multiline=False, **kwargs):
 		self._matchCache = None
 		# Perform shortcuts expansions.
-		# Resolve "<HOST>" tag using default regular expression for host:
+		# Replace standard f2b-tags (like "<HOST>", etc) using default regular expressions:
 		regex = Regex._resolveHostTag(regex, **kwargs)
-		# Replace "<SKIPLINES>" with regular expression for multiple lines.
-		regexSplit = regex.split("<SKIPLINES>")
-		regex = regexSplit[0]
-		for n, regexLine in enumerate(regexSplit[1:]):
-			regex += "\n(?P<skiplines%i>(?:(.*\n)*?))" % n + regexLine
+		#
 		if regex.lstrip() == '':
 			raise RegexException("Cannot add empty regex")
+		flags = re.MULTILINE if (multiline or "\n" in regex or r"\n" in regex) else 0
 		try:
-			self._regexObj = re.compile(regex, re.MULTILINE)
+			self._regexObj = re.compile(regex, flags)
 			self._regex = regex
 		except sre_constants.error:
 			raise RegexException("Unable to compile regular expression '%s'" %
@ -125,6 +122,11 @@ class Regex:
 	def __str__(self):
 		return "%s(%r)" % (self.__class__.__name__, self._regex)

+	@property
+	def flags(self):
+		"""Returns the regex matching flags combination of the compiled regex object"""
+		return self._regexObj.flags
+
 	##
 	# Replaces "<HOST>", "<IP4>", "<IP6>", "<FID>" with default regular expression for host
 	#
@ -135,6 +137,9 @@ class Regex:
 	def _resolveHostTag(regex, useDns="yes"):

 		openTags = dict()
+		props = {
+			'nl': 0, # new lines counter by <SKIPLINES> tag;
+		}
 		# tag interpolation callable:
 		def substTag(m):
 			tag = m.group()
@ -142,6 +147,11 @@ class Regex:
 			# 3 groups instead of <HOST> - separated ipv4, ipv6 and host (dns)
 			if tn == "HOST":
 				return R_HOST[RI_HOST if useDns not in ("no",) else RI_ADDR]
+			# replace "<SKIPLINES>" with regular expression for multiple lines (by buffering with maxlines)
+			if tn == "SKIPLINES":
+				nl = props['nl']
+				props['nl'] = nl + 1
+				return r"\n(?P<skiplines%i>(?:(?:.*\n)*?))" % (nl,)
 			# static replacement from RH4TAG:
 			try:
 				return RH4TAG[tn]
--- a/fail2ban/server/filter.py
+++ b/fail2ban/server/filter.py
@ -161,10 +161,14 @@ class Filter(JailThread):
 	# @param value the regular expression

 	def addFailRegex(self, value):
+		multiLine = self.getMaxLines() > 1
 		try:
-			regex = FailRegex(value, prefRegex=self.__prefRegex, useDns=self.__useDns)
+			regex = FailRegex(value, prefRegex=self.__prefRegex, multiline=multiLine,
+				useDns=self.__useDns)
 			self.__failRegex.append(regex)
-			if "\n" in regex.getRegex() and not self.getMaxLines() > 1:
+			regexExpr = regex.getRegex()
+			# check new lines present in regex (was compiled as multiline), incorrect by `maxlines=1`:
+			if (regex.flags & re.MULTILINE) and not multiLine:
 				logSys.warning(
 					"Mutliline regex set for jail %r "
 					"but maxlines not greater than 1", self.jailName)
--- a/fail2ban/tests/samplestestcase.py
+++ b/fail2ban/tests/samplestestcase.py
@ -41,7 +41,7 @@ TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), "files")

 # regexp to test greedy catch-all should be not-greedy:
 RE_HOST = Regex('<HOST>').getRegex()
-RE_WRONG_GREED = re.compile(r'\.[+\*](?!\?).*' + re.escape(RE_HOST) + r'.*(?:\.[+\*].*|[^\$])$')
+RE_WRONG_GREED = re.compile(r'\.[+\*](?!\?)[^\$\^]*' + re.escape(RE_HOST) + r'.*(?:\.[+\*].*|[^\$])$')


 class FilterSamplesRegex(unittest.TestCase):