code normalization and optimization (strip of trailing new-line, date parsing, ignoreregex mechanism, etc)

2020-02-13 21:26:28 +01:00 · 2020-02-13 21:26:28 +01:00 · b3644ad413
parent 91eca4fdeb
commit b3644ad413
4 changed files with 111 additions and 122 deletions
--- a/fail2ban/client/fail2banregex.py
+++ b/fail2ban/client/fail2banregex.py
@ -273,7 +273,9 @@ class Fail2banRegex(object):
 		self._filter.checkFindTime = False
 		self._filter.checkAllRegex = opts.checkAllRegex and not opts.out
 		# ignore pending (without ID/IP), added to matches if it hits later (if ID/IP can be retreved)
-		self._filter.ignorePending = opts.out; 
+		self._filter.ignorePending = opts.out
 		# callback to increment ignored RE's by index (during process):
 		self._filter.onIgnoreRegex = self._onIgnoreRegex
 		self._backend = 'auto'
 	def output(self, line):
@ -435,22 +437,17 @@ class Fail2banRegex(object):
 					'add%sRegex' % regextype.title())(regex.getFailRegex())
 		return True
-	def testIgnoreRegex(self, line):
+	def _onIgnoreRegex(self, idx, ignoreRegex):
-		found = False
+		self._lineIgnored = True
-		try:
+		self._ignoreregex[idx].inc()
 			ret = self._filter.ignoreLine([(line, "", "")])
 			if ret is not None:
 				found = True
 				regex = self._ignoreregex[ret].inc()
 		except RegexException as e: # pragma: no cover
 			output( 'ERROR: %s' % e )
 			return False
 		return found
 	def testRegex(self, line, date=None):
 		orgLineBuffer = self._filter._Filter__lineBuffer
 		# duplicate line buffer (list can be changed inplace during processLine):
 		if self._filter.getMaxLines() > 1:
 			orgLineBuffer = orgLineBuffer[:]
 		fullBuffer = len(orgLineBuffer) >= self._filter.getMaxLines()
-		is_ignored = False
+		is_ignored = self._lineIgnored = False
 		try:
 			found = self._filter.processLine(line, date)
 			lines = []
@ -469,6 +466,7 @@ class Fail2banRegex(object):
 		except RegexException as e: # pragma: no cover
 			output( 'ERROR: %s' % e )
 			return False
 		if self._filter.getMaxLines() > 1:
 			for bufLine in orgLineBuffer[int(fullBuffer):]:
 				if bufLine not in self._filter._Filter__lineBuffer:
 					try:
@ -491,7 +489,7 @@ class Fail2banRegex(object):
 		if lines: # pre-lines parsed in multiline mode (buffering)
 			lines.append(self._filter.processedLine())
 			line = "\n".join(lines)
-		return line, ret, is_ignored
+		return line, ret, (is_ignored or self._lineIgnored)
 	def _prepaireOutput(self):
 		"""Prepares output- and fetch-function corresponding given '--out' option (format)"""
@ -558,8 +556,7 @@ class Fail2banRegex(object):
 			out = self._prepaireOutput()
 		for line in test_lines:
 			if isinstance(line, tuple):
-				line_datetimestripped, ret, is_ignored = self.testRegex(
+				line_datetimestripped, ret, is_ignored = self.testRegex(line[0], line[1])
 					line[0], line[1])
 				line = "".join(line[0])
 			else:
 				line = line.rstrip('\r\n')
@ -567,11 +564,9 @@ class Fail2banRegex(object):
 					# skip comment and empty lines
 					continue
 				line_datetimestripped, ret, is_ignored = self.testRegex(line)
 			if not is_ignored:
 				is_ignored = self.testIgnoreRegex(line_datetimestripped)
 			if self._opts.out: # (formated) output:
-				if len(ret) > 0: out(ret)
+				if len(ret) > 0 and not is_ignored: out(ret)
 				continue
 			if is_ignored:
--- a/fail2ban/server/filter.py
+++ b/fail2ban/server/filter.py
@ -107,6 +107,8 @@ class Filter(JailThread):
 		self.checkAllRegex = False
 		## avoid finding of pending failures (without ID/IP, used in fail2ban-regex):
 		self.ignorePending = True
 		## callback called on ignoreregex match :
 		self.onIgnoreRegex = None
 		## if true ignores obsolete failures (failure time < now - findTime):
 		self.checkFindTime = True
 		## Ticks counter
@ -170,7 +172,7 @@ class Filter(JailThread):
 	# @param value the regular expression
 	def addFailRegex(self, value):
-		multiLine = self.getMaxLines() > 1
+		multiLine = self.__lineBufferSize > 1
 		try:
 			regex = FailRegex(value, prefRegex=self.__prefRegex, multiline=multiLine,
 				useDns=self.__useDns)
@ -575,20 +577,33 @@ class Filter(JailThread):
 		"""
 		if date:
 			tupleLine = line
 			self.__lastTimeText = tupleLine[1]
 			self.__lastDate = date
 		else:
 			l = line.rstrip('\r\n')
 			logSys.log(7, "Working on line %r", line)
-			(timeMatch, template) = self.dateDetector.matchTime(l)
+			# try to parse date:
-			if timeMatch:
+			timeMatch = self.dateDetector.matchTime(line)
-				tupleLine  = (
+			m = timeMatch[0]
-					l[:timeMatch.start(1)],
+			if m:
-					l[timeMatch.start(1):timeMatch.end(1)],
+				s = m.start(1)
-					l[timeMatch.end(1):],
+				e = m.end(1)
-					(timeMatch, template)
+				m = line[s:e]
-				)
+				tupleLine = (line[:s], m, line[e:])
 				if m: # found and not empty - retrive date:
 					date = self.dateDetector.getTime(m, timeMatch)
 				if date is None:
 					if m: logSys.error("findFailure failed to parse timeText: %s", m)
 					date = self.__lastDate
 				else:
-				tupleLine = (l, "", "", None)
+					# Lets get the time part
 					date = date[0]
 					self.__lastTimeText = m
 					self.__lastDate = date
 			else:
 				tupleLine = (line, self.__lastTimeText, "")
 				date = self.__lastDate
 		# save last line (lazy convert of process line tuple to string on demand):
 		self.processedLine = lambda: "".join(tupleLine[::2])
@ -630,20 +645,26 @@ class Filter(JailThread):
 			self._errors //= 2
 			self.idle = True
-	##
+	def _ignoreLine(self, buf, orgBuffer, failRegex=None):
-	# Returns true if the line should be ignored.
+		# if multi-line buffer - use matched only, otherwise (single line) - original buf:
-	#
+		if failRegex and self.__lineBufferSize > 1:
-	# Uses ignoreregex.
+			orgBuffer = failRegex.getMatchedTupleLines()
-	# @param line: the line
+			buf = Regex._tupleLinesBuf(orgBuffer)
-	# @return: a boolean
+		# search ignored:
-
+		fnd = None
 	def ignoreLine(self, tupleLines):
 		buf = Regex._tupleLinesBuf(tupleLines)
 		for ignoreRegexIndex, ignoreRegex in enumerate(self.__ignoreRegex):
-			ignoreRegex.search(buf, tupleLines)
+			ignoreRegex.search(buf, orgBuffer)
 			if ignoreRegex.hasMatched():
-				return ignoreRegexIndex
+				fnd = ignoreRegexIndex
-		return None
+				logSys.log(7, "  Matched ignoreregex %d and was ignored", fnd)
 				if self.onIgnoreRegex: self.onIgnoreRegex(fnd, ignoreRegex)
 				# remove ignored match:
 				if not self.checkAllRegex or self.__lineBufferSize > 1:
 					# todo: check ignoreRegex.getUnmatchedTupleLines() would be better (fix testGetFailuresMultiLineIgnoreRegex):
 					if failRegex:
 						self.__lineBuffer = failRegex.getUnmatchedTupleLines()
 				if not self.checkAllRegex: break
 		return fnd
 	def _updateUsers(self, fail, user=()):
 		users = fail.get('users')
@ -713,7 +734,7 @@ class Filter(JailThread):
 	# to find the logging time.
 	# @return a dict with IP and timestamp.
-	def findFailure(self, tupleLine, date=None):
+	def findFailure(self, tupleLine, date):
 		failList = list()
 		ll = logSys.getEffectiveLevel()
@ -723,62 +744,38 @@ class Filter(JailThread):
 			returnRawHost = True
 			cidr = IPAddr.CIDR_RAW
 		# Checks if we mut ignore this line.
 		if self.ignoreLine([tupleLine[::2]]) is not None:
 			# The ignoreregex matched. Return.
 			if ll <= 7: logSys.log(7, "Matched ignoreregex and was \"%s\" ignored",
 				"".join(tupleLine[::2]))
 			return failList
 		timeText = tupleLine[1]
 		if date:
 			self.__lastTimeText = timeText
 			self.__lastDate = date
 		elif timeText:
 			dateTimeMatch = self.dateDetector.getTime(timeText, tupleLine[3])
 			if dateTimeMatch is None:
 				logSys.error("findFailure failed to parse timeText: %s", timeText)
 				date = self.__lastDate
 			else:
 				# Lets get the time part
 				date = dateTimeMatch[0]
 				self.__lastTimeText = timeText
 				self.__lastDate = date
 		else:
 			timeText = self.__lastTimeText or "".join(tupleLine[::2])
 			date = self.__lastDate
 		if self.checkFindTime and date is not None and date < MyTime.time() - self.getFindTime():
 			if ll <= 5: logSys.log(5, "Ignore line since time %s < %s - %s", 
 				date, MyTime.time(), self.getFindTime())
 			return failList
 		if self.__lineBufferSize > 1:
-			orgBuffer = self.__lineBuffer = (
+			self.__lineBuffer.append(tupleLine)
-				self.__lineBuffer + [tupleLine[:3]])[-self.__lineBufferSize:]
+			orgBuffer = self.__lineBuffer = self.__lineBuffer[-self.__lineBufferSize:]
 		else:
-			orgBuffer = self.__lineBuffer = [tupleLine[:3]]
+			orgBuffer = self.__lineBuffer = [tupleLine]
-		if ll <= 5: logSys.log(5, "Looking for match of %r", self.__lineBuffer)
+		if ll <= 5: logSys.log(5, "Looking for match of %r", orgBuffer)
-		buf = Regex._tupleLinesBuf(self.__lineBuffer)
+		buf = Regex._tupleLinesBuf(orgBuffer)
 		# Checks if we must ignore this line (only if fewer ignoreregex than failregex).
 		if self.__ignoreRegex and len(self.__ignoreRegex) < len(self.__failRegex) - 2:
 			if self._ignoreLine(buf, orgBuffer) is not None:
 				# The ignoreregex matched. Return.
 				return failList
 		# Pre-filter fail regex (if available):
 		preGroups = {}
 		if self.__prefRegex:
 			if ll <= 5: logSys.log(5, "  Looking for prefregex %r", self.__prefRegex.getRegex())
-			self.__prefRegex.search(buf, self.__lineBuffer)
+			self.__prefRegex.search(buf, orgBuffer)
 			if not self.__prefRegex.hasMatched():
 				if ll <= 5: logSys.log(5, "  Prefregex not matched")
 				return failList
 			preGroups = self.__prefRegex.getGroups()
 			if ll <= 7: logSys.log(7, "  Pre-filter matched %s", preGroups)
-			repl = preGroups.get('content')
+			repl = preGroups.pop('content', None)
 			# Content replacement:
 			if repl:
 				del preGroups['content']
 				self.__lineBuffer, buf = [('', '', repl)], None
 		# Iterates over all the regular expressions.
@ -796,14 +793,11 @@ class Filter(JailThread):
 				# The failregex matched.
 				if ll <= 7: logSys.log(7, "  Matched failregex %d: %s", failRegexIndex, fail)
 				# Checks if we must ignore this match.
-				if self.ignoreLine(failRegex.getMatchedTupleLines()) \
+				if self.__ignoreRegex and self._ignoreLine(buf, orgBuffer, failRegex) is not None:
 						is not None:
 					# The ignoreregex matched. Remove ignored match.
-					self.__lineBuffer, buf = failRegex.getUnmatchedTupleLines(), None
+					buf = None
 					if ll <= 7: logSys.log(7, "  Matched ignoreregex and was ignored")
 					if not self.checkAllRegex:
 						break
 					else:
 					continue
 				if date is None:
 					logSys.warning(
@ -814,10 +808,10 @@ class Filter(JailThread):
 						"file a detailed issue on"
 						" https://github.com/fail2ban/fail2ban/issues "
 						"in order to get support for this format.",
-						 "\n".join(failRegex.getMatchedLines()), timeText)
+						 "\n".join(failRegex.getMatchedLines()), tupleLine[1])
 					continue
 				# we should check all regex (bypass on multi-line, otherwise too complex):
-				if not self.checkAllRegex or self.getMaxLines() > 1:
+				if not self.checkAllRegex or self.__lineBufferSize > 1:
 					self.__lineBuffer, buf = failRegex.getUnmatchedTupleLines(), None
 				# merge data if multi-line failure:
 				raw = returnRawHost
@ -1056,7 +1050,7 @@ class FileFilter(Filter):
 					if not line or not self.active:
 						# The jail reached the bottom or has been stopped
 						break
-					self.processLineAndAdd(line)
+					self.processLineAndAdd(line.rstrip('\r\n'))
 		finally:
 			log.close()
 		db = self.jail.database
--- a/fail2ban/tests/filtertestcase.py
+++ b/fail2ban/tests/filtertestcase.py
@ -63,9 +63,6 @@ def open(*args):
 	if len(args) == 2:
 		# ~50kB buffer should be sufficient for all tests here.
 		args = args + (50000,)
 	if sys.version_info >= (3,):
 		return fopen(*args, **{'encoding': 'utf-8', 'errors': 'ignore'})
 	else:
 	return fopen(*args)
@ -200,7 +197,7 @@ def _copy_lines_between_files(in_, fout, n=None, skip=0, mode='a', terminal_line
 	# polling filter could detect the change
 	mtimesleep()
 	if isinstance(in_, str): # pragma: no branch - only used with str in test cases
-		fin = open(in_, 'r')
+		fin = open(in_, 'rb')
 	else:
 		fin = in_
 	# Skip
@ -210,7 +207,7 @@ def _copy_lines_between_files(in_, fout, n=None, skip=0, mode='a', terminal_line
 	i = 0
 	lines = []
 	while n is None or i < n:
-		l = fin.readline()
+		l = FileContainer.decode_line(in_, 'UTF-8', fin.readline()).rstrip('\r\n')
 		if terminal_line is not None and l == terminal_line:
 			break
 		lines.append(l)
@ -238,7 +235,7 @@ def _copy_lines_to_journal(in_, fields={},n=None, skip=0, terminal_line=""): # p
 	Returns None
 	"""
 	if isinstance(in_, str): # pragma: no branch - only used with str in test cases
-		fin = open(in_, 'r')
+		fin = open(in_, 'rb')
 	else:
 		fin = in_
 	# Required for filtering
@ -249,7 +246,7 @@ def _copy_lines_to_journal(in_, fields={},n=None, skip=0, terminal_line=""): # p
 	# Read/Write
 	i = 0
 	while n is None or i < n:
-		l = fin.readline()
+		l = FileContainer.decode_line(in_, 'UTF-8', fin.readline()).rstrip('\r\n')
 		if terminal_line is not None and l == terminal_line:
 			break
 		journal.send(MESSAGE=l.strip(), **fields)
@ -1583,9 +1580,9 @@ class GetFailures(LogCaptureTestCase):
 		# We first adjust logfile/failures to end with CR+LF
 		fname = tempfile.mktemp(prefix='tmp_fail2ban', suffix='crlf')
 		# poor man unix2dos:
-		fin, fout = open(GetFailures.FILENAME_01), open(fname, 'w')
+		fin, fout = open(GetFailures.FILENAME_01, 'rb'), open(fname, 'wb')
-		for l in fin.readlines():
+		for l in fin.read().splitlines():
-			fout.write('%s\r\n' % l.rstrip('\n'))
+			fout.write(l + b'\r\n')
 		fin.close()
 		fout.close()
--- a/fail2ban/tests/samplestestcase.py
+++ b/fail2ban/tests/samplestestcase.py
@ -32,7 +32,7 @@ import sys
 import time
 import unittest
 from ..server.failregex import Regex
-from ..server.filter import Filter
+from ..server.filter import Filter, FileContainer
 from ..client.filterreader import FilterReader
 from .utils import setUpMyTime, tearDownMyTime, TEST_NOW, CONFIG_DIR
@ -157,10 +157,11 @@ def testSampleRegexsFactory(name, basedir):
 		while i < len(filenames):
 			filename = filenames[i]; i += 1;
 			logFile = fileinput.FileInput(os.path.join(TEST_FILES_DIR, "logs",
-				filename))
+				filename), mode='rb')
 			ignoreBlock = False
 			for line in logFile:
 				line = FileContainer.decode_line(logFile.filename(), 'UTF-8', line)
 				jsonREMatch = re.match("^#+ ?(failJSON|(?:file|filter)Options|addFILE):(.+)$", line)
 				if jsonREMatch:
 					try:
@ -202,6 +203,7 @@ def testSampleRegexsFactory(name, basedir):
 						raise ValueError("%s: %s:%i" %
 							(e, logFile.filename(), logFile.filelineno()))
 					line = next(logFile)
 					line = FileContainer.decode_line(logFile.filename(), 'UTF-8', line)
 				elif ignoreBlock or line.startswith("#") or not line.strip():
 					continue
 				else: # pragma: no cover - normally unreachable
@ -214,6 +216,7 @@ def testSampleRegexsFactory(name, basedir):
 					flt = self._readFilter(fltName, name, basedir, opts=None)
 					self._filterTests = [(fltName, flt, {})]
 				line = line.rstrip('\r\n')
 				# process line using several filter options (if specified in the test-file):
 				for fltName, flt, opts in self._filterTests:
 					# Bypass if constraint (as expression) is not valid:
@ -230,7 +233,7 @@ def testSampleRegexsFactory(name, basedir):
 						else: # simulate journal processing, time is known from journal (formatJournalEntry):
 							if opts.get('test.prefix-line'): # journal backends creates common prefix-line:
 								line = opts.get('test.prefix-line') + line
-							ret = flt.processLine(('', TEST_NOW_STR, line.rstrip('\r\n')), TEST_NOW)
+							ret = flt.processLine(('', TEST_NOW_STR, line), TEST_NOW)
 						if ret:
 							# filter matched only (in checkAllRegex mode it could return 'nofail' too):
 							found = []