code normalization and optimization (strip of trailing new-line, date parsing, ignoreregex mechanism, etc)

2020-02-13 21:26:28 +01:00 · 2020-02-13 21:26:28 +01:00 · b3644ad413
parent 91eca4fdeb
commit b3644ad413
4 changed files with 111 additions and 122 deletions
--- a/fail2ban/client/fail2banregex.py
+++ b/fail2ban/client/fail2banregex.py
@ -273,7 +273,9 @@ class Fail2banRegex(object):
 		self._filter.checkFindTime = False
 		self._filter.checkAllRegex = opts.checkAllRegex and not opts.out
 		# ignore pending (without ID/IP), added to matches if it hits later (if ID/IP can be retreved)
-		self._filter.ignorePending = opts.out; 
+		self._filter.ignorePending = opts.out
+		# callback to increment ignored RE's by index (during process):
+		self._filter.onIgnoreRegex = self._onIgnoreRegex
 		self._backend = 'auto'

 	def output(self, line):
@ -435,22 +437,17 @@ class Fail2banRegex(object):
 					'add%sRegex' % regextype.title())(regex.getFailRegex())
 		return True

-	def testIgnoreRegex(self, line):
-		found = False
-		try:
-			ret = self._filter.ignoreLine([(line, "", "")])
-			if ret is not None:
-				found = True
-				regex = self._ignoreregex[ret].inc()
-		except RegexException as e: # pragma: no cover
-			output( 'ERROR: %s' % e )
-			return False
-		return found
+	def _onIgnoreRegex(self, idx, ignoreRegex):
+		self._lineIgnored = True
+		self._ignoreregex[idx].inc()

 	def testRegex(self, line, date=None):
 		orgLineBuffer = self._filter._Filter__lineBuffer
+		# duplicate line buffer (list can be changed inplace during processLine):
+		if self._filter.getMaxLines() > 1:
+			orgLineBuffer = orgLineBuffer[:]
 		fullBuffer = len(orgLineBuffer) >= self._filter.getMaxLines()
-		is_ignored = False
+		is_ignored = self._lineIgnored = False
 		try:
 			found = self._filter.processLine(line, date)
 			lines = []
@ -469,29 +466,30 @@ class Fail2banRegex(object):
 		except RegexException as e: # pragma: no cover
 			output( 'ERROR: %s' % e )
 			return False
-		for bufLine in orgLineBuffer[int(fullBuffer):]:
-			if bufLine not in self._filter._Filter__lineBuffer:
-				try:
-					self._line_stats.missed_lines.pop(
-						self._line_stats.missed_lines.index("".join(bufLine)))
-					if self._debuggex:
-						self._line_stats.missed_lines_timeextracted.pop(
-							self._line_stats.missed_lines_timeextracted.index(
-								"".join(bufLine[::2])))
-				except ValueError:
-					pass
-				# if buffering - add also another lines from match:
-				if self._print_all_matched:
-					if not self._debuggex:
-						self._line_stats.matched_lines.append("".join(bufLine))
-					else:
-						lines.append(bufLine[0] + bufLine[2])
-				self._line_stats.matched += 1
-				self._line_stats.missed -= 1
+		if self._filter.getMaxLines() > 1:
+			for bufLine in orgLineBuffer[int(fullBuffer):]:
+				if bufLine not in self._filter._Filter__lineBuffer:
+					try:
+						self._line_stats.missed_lines.pop(
+							self._line_stats.missed_lines.index("".join(bufLine)))
+						if self._debuggex:
+							self._line_stats.missed_lines_timeextracted.pop(
+								self._line_stats.missed_lines_timeextracted.index(
+									"".join(bufLine[::2])))
+					except ValueError:
+						pass
+					# if buffering - add also another lines from match:
+					if self._print_all_matched:
+						if not self._debuggex:
+							self._line_stats.matched_lines.append("".join(bufLine))
+						else:
+							lines.append(bufLine[0] + bufLine[2])
+					self._line_stats.matched += 1
+					self._line_stats.missed -= 1
 		if lines: # pre-lines parsed in multiline mode (buffering)
 			lines.append(self._filter.processedLine())
 			line = "\n".join(lines)
-		return line, ret, is_ignored
+		return line, ret, (is_ignored or self._lineIgnored)

 	def _prepaireOutput(self):
 		"""Prepares output- and fetch-function corresponding given '--out' option (format)"""
@ -558,8 +556,7 @@ class Fail2banRegex(object):
 			out = self._prepaireOutput()
 		for line in test_lines:
 			if isinstance(line, tuple):
-				line_datetimestripped, ret, is_ignored = self.testRegex(
-					line[0], line[1])
+				line_datetimestripped, ret, is_ignored = self.testRegex(line[0], line[1])
 				line = "".join(line[0])
 			else:
 				line = line.rstrip('\r\n')
@ -567,11 +564,9 @@ class Fail2banRegex(object):
 					# skip comment and empty lines
 					continue
 				line_datetimestripped, ret, is_ignored = self.testRegex(line)
-			if not is_ignored:
-				is_ignored = self.testIgnoreRegex(line_datetimestripped)

 			if self._opts.out: # (formated) output:
-				if len(ret) > 0: out(ret)
+				if len(ret) > 0 and not is_ignored: out(ret)
 				continue

 			if is_ignored:
--- a/fail2ban/server/filter.py
+++ b/fail2ban/server/filter.py
@ -107,6 +107,8 @@ class Filter(JailThread):
 		self.checkAllRegex = False
 		## avoid finding of pending failures (without ID/IP, used in fail2ban-regex):
 		self.ignorePending = True
+		## callback called on ignoreregex match :
+		self.onIgnoreRegex = None
 		## if true ignores obsolete failures (failure time < now - findTime):
 		self.checkFindTime = True
 		## Ticks counter
@ -170,7 +172,7 @@ class Filter(JailThread):
 	# @param value the regular expression

 	def addFailRegex(self, value):
-		multiLine = self.getMaxLines() > 1
+		multiLine = self.__lineBufferSize > 1
 		try:
 			regex = FailRegex(value, prefRegex=self.__prefRegex, multiline=multiLine,
 				useDns=self.__useDns)
@ -575,20 +577,33 @@ class Filter(JailThread):
 		"""
 		if date:
 			tupleLine = line
+			self.__lastTimeText = tupleLine[1]
+			self.__lastDate = date
 		else:
-			l = line.rstrip('\r\n')
 			logSys.log(7, "Working on line %r", line)

-			(timeMatch, template) = self.dateDetector.matchTime(l)
-			if timeMatch:
-				tupleLine  = (
-					l[:timeMatch.start(1)],
-					l[timeMatch.start(1):timeMatch.end(1)],
-					l[timeMatch.end(1):],
-					(timeMatch, template)
-				)
+			# try to parse date:
+			timeMatch = self.dateDetector.matchTime(line)
+			m = timeMatch[0]
+			if m:
+				s = m.start(1)
+				e = m.end(1)
+				m = line[s:e]
+				tupleLine = (line[:s], m, line[e:])
+				if m: # found and not empty - retrive date:
+					date = self.dateDetector.getTime(m, timeMatch)
+
+				if date is None:
+					if m: logSys.error("findFailure failed to parse timeText: %s", m)
+					date = self.__lastDate
+				else:
+					# Lets get the time part
+					date = date[0]
+					self.__lastTimeText = m
+					self.__lastDate = date
 			else:
-				tupleLine = (l, "", "", None)
+				tupleLine = (line, self.__lastTimeText, "")
+				date = self.__lastDate

 		# save last line (lazy convert of process line tuple to string on demand):
 		self.processedLine = lambda: "".join(tupleLine[::2])
@ -630,20 +645,26 @@ class Filter(JailThread):
 			self._errors //= 2
 			self.idle = True

-	##
-	# Returns true if the line should be ignored.
-	#
-	# Uses ignoreregex.
-	# @param line: the line
-	# @return: a boolean
-
-	def ignoreLine(self, tupleLines):
-		buf = Regex._tupleLinesBuf(tupleLines)
+	def _ignoreLine(self, buf, orgBuffer, failRegex=None):
+		# if multi-line buffer - use matched only, otherwise (single line) - original buf:
+		if failRegex and self.__lineBufferSize > 1:
+			orgBuffer = failRegex.getMatchedTupleLines()
+			buf = Regex._tupleLinesBuf(orgBuffer)
+		# search ignored:
+		fnd = None
 		for ignoreRegexIndex, ignoreRegex in enumerate(self.__ignoreRegex):
-			ignoreRegex.search(buf, tupleLines)
+			ignoreRegex.search(buf, orgBuffer)
 			if ignoreRegex.hasMatched():
-				return ignoreRegexIndex
-		return None
+				fnd = ignoreRegexIndex
+				logSys.log(7, "  Matched ignoreregex %d and was ignored", fnd)
+				if self.onIgnoreRegex: self.onIgnoreRegex(fnd, ignoreRegex)
+				# remove ignored match:
+				if not self.checkAllRegex or self.__lineBufferSize > 1:
+					# todo: check ignoreRegex.getUnmatchedTupleLines() would be better (fix testGetFailuresMultiLineIgnoreRegex):
+					if failRegex:
+						self.__lineBuffer = failRegex.getUnmatchedTupleLines()
+				if not self.checkAllRegex: break
+		return fnd

 	def _updateUsers(self, fail, user=()):
 		users = fail.get('users')
@ -713,7 +734,7 @@ class Filter(JailThread):
 	# to find the logging time.
 	# @return a dict with IP and timestamp.

-	def findFailure(self, tupleLine, date=None):
+	def findFailure(self, tupleLine, date):
 		failList = list()

 		ll = logSys.getEffectiveLevel()
@ -723,62 +744,38 @@ class Filter(JailThread):
 			returnRawHost = True
 			cidr = IPAddr.CIDR_RAW

-		# Checks if we mut ignore this line.
-		if self.ignoreLine([tupleLine[::2]]) is not None:
-			# The ignoreregex matched. Return.
-			if ll <= 7: logSys.log(7, "Matched ignoreregex and was \"%s\" ignored",
-				"".join(tupleLine[::2]))
-			return failList
-
-		timeText = tupleLine[1]
-		if date:
-			self.__lastTimeText = timeText
-			self.__lastDate = date
-		elif timeText:
-
-			dateTimeMatch = self.dateDetector.getTime(timeText, tupleLine[3])
-
-			if dateTimeMatch is None:
-				logSys.error("findFailure failed to parse timeText: %s", timeText)
-				date = self.__lastDate
-
-			else:
-				# Lets get the time part
-				date = dateTimeMatch[0]
-
-				self.__lastTimeText = timeText
-				self.__lastDate = date
-		else:
-			timeText = self.__lastTimeText or "".join(tupleLine[::2])
-			date = self.__lastDate
-
 		if self.checkFindTime and date is not None and date < MyTime.time() - self.getFindTime():
 			if ll <= 5: logSys.log(5, "Ignore line since time %s < %s - %s", 
 				date, MyTime.time(), self.getFindTime())
 			return failList

 		if self.__lineBufferSize > 1:
-			orgBuffer = self.__lineBuffer = (
-				self.__lineBuffer + [tupleLine[:3]])[-self.__lineBufferSize:]
+			self.__lineBuffer.append(tupleLine)
+			orgBuffer = self.__lineBuffer = self.__lineBuffer[-self.__lineBufferSize:]
 		else:
-			orgBuffer = self.__lineBuffer = [tupleLine[:3]]
-		if ll <= 5: logSys.log(5, "Looking for match of %r", self.__lineBuffer)
-		buf = Regex._tupleLinesBuf(self.__lineBuffer)
+			orgBuffer = self.__lineBuffer = [tupleLine]
+		if ll <= 5: logSys.log(5, "Looking for match of %r", orgBuffer)
+		buf = Regex._tupleLinesBuf(orgBuffer)
+
+		# Checks if we must ignore this line (only if fewer ignoreregex than failregex).
+		if self.__ignoreRegex and len(self.__ignoreRegex) < len(self.__failRegex) - 2:
+			if self._ignoreLine(buf, orgBuffer) is not None:
+				# The ignoreregex matched. Return.
+				return failList

 		# Pre-filter fail regex (if available):
 		preGroups = {}
 		if self.__prefRegex:
 			if ll <= 5: logSys.log(5, "  Looking for prefregex %r", self.__prefRegex.getRegex())
-			self.__prefRegex.search(buf, self.__lineBuffer)
+			self.__prefRegex.search(buf, orgBuffer)
 			if not self.__prefRegex.hasMatched():
 				if ll <= 5: logSys.log(5, "  Prefregex not matched")
 				return failList
 			preGroups = self.__prefRegex.getGroups()
 			if ll <= 7: logSys.log(7, "  Pre-filter matched %s", preGroups)
-			repl = preGroups.get('content')
+			repl = preGroups.pop('content', None)
 			# Content replacement:
 			if repl:
-				del preGroups['content']
 				self.__lineBuffer, buf = [('', '', repl)], None

 		# Iterates over all the regular expressions.
@ -796,15 +793,12 @@ class Filter(JailThread):
 				# The failregex matched.
 				if ll <= 7: logSys.log(7, "  Matched failregex %d: %s", failRegexIndex, fail)
 				# Checks if we must ignore this match.
-				if self.ignoreLine(failRegex.getMatchedTupleLines()) \
-						is not None:
+				if self.__ignoreRegex and self._ignoreLine(buf, orgBuffer, failRegex) is not None:
 					# The ignoreregex matched. Remove ignored match.
-					self.__lineBuffer, buf = failRegex.getUnmatchedTupleLines(), None
-					if ll <= 7: logSys.log(7, "  Matched ignoreregex and was ignored")
+					buf = None
 					if not self.checkAllRegex:
 						break
-					else:
-						continue
+					continue
 				if date is None:
 					logSys.warning(
 						"Found a match for %r but no valid date/time "
@ -814,10 +808,10 @@ class Filter(JailThread):
 						"file a detailed issue on"
 						" https://github.com/fail2ban/fail2ban/issues "
 						"in order to get support for this format.",
-						 "\n".join(failRegex.getMatchedLines()), timeText)
+						 "\n".join(failRegex.getMatchedLines()), tupleLine[1])
 					continue
 				# we should check all regex (bypass on multi-line, otherwise too complex):
-				if not self.checkAllRegex or self.getMaxLines() > 1:
+				if not self.checkAllRegex or self.__lineBufferSize > 1:
 					self.__lineBuffer, buf = failRegex.getUnmatchedTupleLines(), None
 				# merge data if multi-line failure:
 				raw = returnRawHost
@ -1056,7 +1050,7 @@ class FileFilter(Filter):
 					if not line or not self.active:
 						# The jail reached the bottom or has been stopped
 						break
-					self.processLineAndAdd(line)
+					self.processLineAndAdd(line.rstrip('\r\n'))
 		finally:
 			log.close()
 		db = self.jail.database
--- a/fail2ban/tests/filtertestcase.py
+++ b/fail2ban/tests/filtertestcase.py
@ -63,10 +63,7 @@ def open(*args):
 	if len(args) == 2:
 		# ~50kB buffer should be sufficient for all tests here.
 		args = args + (50000,)
-	if sys.version_info >= (3,):
-		return fopen(*args, **{'encoding': 'utf-8', 'errors': 'ignore'})
-	else:
-		return fopen(*args)
+	return fopen(*args)


 def _killfile(f, name):
@ -200,7 +197,7 @@ def _copy_lines_between_files(in_, fout, n=None, skip=0, mode='a', terminal_line
 	# polling filter could detect the change
 	mtimesleep()
 	if isinstance(in_, str): # pragma: no branch - only used with str in test cases
-		fin = open(in_, 'r')
+		fin = open(in_, 'rb')
 	else:
 		fin = in_
 	# Skip
@ -210,7 +207,7 @@ def _copy_lines_between_files(in_, fout, n=None, skip=0, mode='a', terminal_line
 	i = 0
 	lines = []
 	while n is None or i < n:
-		l = fin.readline()
+		l = FileContainer.decode_line(in_, 'UTF-8', fin.readline()).rstrip('\r\n')
 		if terminal_line is not None and l == terminal_line:
 			break
 		lines.append(l)
@ -238,7 +235,7 @@ def _copy_lines_to_journal(in_, fields={},n=None, skip=0, terminal_line=""): # p
 	Returns None
 	"""
 	if isinstance(in_, str): # pragma: no branch - only used with str in test cases
-		fin = open(in_, 'r')
+		fin = open(in_, 'rb')
 	else:
 		fin = in_
 	# Required for filtering
@ -249,7 +246,7 @@ def _copy_lines_to_journal(in_, fields={},n=None, skip=0, terminal_line=""): # p
 	# Read/Write
 	i = 0
 	while n is None or i < n:
-		l = fin.readline()
+		l = FileContainer.decode_line(in_, 'UTF-8', fin.readline()).rstrip('\r\n')
 		if terminal_line is not None and l == terminal_line:
 			break
 		journal.send(MESSAGE=l.strip(), **fields)
@ -1583,9 +1580,9 @@ class GetFailures(LogCaptureTestCase):
 		# We first adjust logfile/failures to end with CR+LF
 		fname = tempfile.mktemp(prefix='tmp_fail2ban', suffix='crlf')
 		# poor man unix2dos:
-		fin, fout = open(GetFailures.FILENAME_01), open(fname, 'w')
-		for l in fin.readlines():
-			fout.write('%s\r\n' % l.rstrip('\n'))
+		fin, fout = open(GetFailures.FILENAME_01, 'rb'), open(fname, 'wb')
+		for l in fin.read().splitlines():
+			fout.write(l + b'\r\n')
 		fin.close()
 		fout.close()

--- a/fail2ban/tests/samplestestcase.py
+++ b/fail2ban/tests/samplestestcase.py
@ -32,7 +32,7 @@ import sys
 import time
 import unittest
 from ..server.failregex import Regex
-from ..server.filter import Filter
+from ..server.filter import Filter, FileContainer
 from ..client.filterreader import FilterReader
 from .utils import setUpMyTime, tearDownMyTime, TEST_NOW, CONFIG_DIR

@ -157,10 +157,11 @@ def testSampleRegexsFactory(name, basedir):
 		while i < len(filenames):
 			filename = filenames[i]; i += 1;
 			logFile = fileinput.FileInput(os.path.join(TEST_FILES_DIR, "logs",
-				filename))
+				filename), mode='rb')

 			ignoreBlock = False
 			for line in logFile:
+				line = FileContainer.decode_line(logFile.filename(), 'UTF-8', line)
 				jsonREMatch = re.match("^#+ ?(failJSON|(?:file|filter)Options|addFILE):(.+)$", line)
 				if jsonREMatch:
 					try:
@ -202,6 +203,7 @@ def testSampleRegexsFactory(name, basedir):
 						raise ValueError("%s: %s:%i" %
 							(e, logFile.filename(), logFile.filelineno()))
 					line = next(logFile)
+					line = FileContainer.decode_line(logFile.filename(), 'UTF-8', line)
 				elif ignoreBlock or line.startswith("#") or not line.strip():
 					continue
 				else: # pragma: no cover - normally unreachable
@ -214,6 +216,7 @@ def testSampleRegexsFactory(name, basedir):
 					flt = self._readFilter(fltName, name, basedir, opts=None)
 					self._filterTests = [(fltName, flt, {})]

+				line = line.rstrip('\r\n')
 				# process line using several filter options (if specified in the test-file):
 				for fltName, flt, opts in self._filterTests:
 					# Bypass if constraint (as expression) is not valid:
@ -230,7 +233,7 @@ def testSampleRegexsFactory(name, basedir):
 						else: # simulate journal processing, time is known from journal (formatJournalEntry):
 							if opts.get('test.prefix-line'): # journal backends creates common prefix-line:
 								line = opts.get('test.prefix-line') + line
-							ret = flt.processLine(('', TEST_NOW_STR, line.rstrip('\r\n')), TEST_NOW)
+							ret = flt.processLine(('', TEST_NOW_STR, line), TEST_NOW)
 						if ret:
 							# filter matched only (in checkAllRegex mode it could return 'nofail' too):
 							found = []