code normalization and optimization (strip of trailing new-line, date parsing, ignoreregex mechanism, etc)

pull/2638/head
sebres 2020-02-13 21:26:28 +01:00
parent 91eca4fdeb
commit b3644ad413
4 changed files with 111 additions and 122 deletions

View File

@ -273,7 +273,9 @@ class Fail2banRegex(object):
self._filter.checkFindTime = False self._filter.checkFindTime = False
self._filter.checkAllRegex = opts.checkAllRegex and not opts.out self._filter.checkAllRegex = opts.checkAllRegex and not opts.out
# ignore pending (without ID/IP), added to matches if it hits later (if ID/IP can be retreved) # ignore pending (without ID/IP), added to matches if it hits later (if ID/IP can be retreved)
self._filter.ignorePending = opts.out; self._filter.ignorePending = opts.out
# callback to increment ignored RE's by index (during process):
self._filter.onIgnoreRegex = self._onIgnoreRegex
self._backend = 'auto' self._backend = 'auto'
def output(self, line): def output(self, line):
@ -435,22 +437,17 @@ class Fail2banRegex(object):
'add%sRegex' % regextype.title())(regex.getFailRegex()) 'add%sRegex' % regextype.title())(regex.getFailRegex())
return True return True
def testIgnoreRegex(self, line): def _onIgnoreRegex(self, idx, ignoreRegex):
found = False self._lineIgnored = True
try: self._ignoreregex[idx].inc()
ret = self._filter.ignoreLine([(line, "", "")])
if ret is not None:
found = True
regex = self._ignoreregex[ret].inc()
except RegexException as e: # pragma: no cover
output( 'ERROR: %s' % e )
return False
return found
def testRegex(self, line, date=None): def testRegex(self, line, date=None):
orgLineBuffer = self._filter._Filter__lineBuffer orgLineBuffer = self._filter._Filter__lineBuffer
# duplicate line buffer (list can be changed inplace during processLine):
if self._filter.getMaxLines() > 1:
orgLineBuffer = orgLineBuffer[:]
fullBuffer = len(orgLineBuffer) >= self._filter.getMaxLines() fullBuffer = len(orgLineBuffer) >= self._filter.getMaxLines()
is_ignored = False is_ignored = self._lineIgnored = False
try: try:
found = self._filter.processLine(line, date) found = self._filter.processLine(line, date)
lines = [] lines = []
@ -469,6 +466,7 @@ class Fail2banRegex(object):
except RegexException as e: # pragma: no cover except RegexException as e: # pragma: no cover
output( 'ERROR: %s' % e ) output( 'ERROR: %s' % e )
return False return False
if self._filter.getMaxLines() > 1:
for bufLine in orgLineBuffer[int(fullBuffer):]: for bufLine in orgLineBuffer[int(fullBuffer):]:
if bufLine not in self._filter._Filter__lineBuffer: if bufLine not in self._filter._Filter__lineBuffer:
try: try:
@ -491,7 +489,7 @@ class Fail2banRegex(object):
if lines: # pre-lines parsed in multiline mode (buffering) if lines: # pre-lines parsed in multiline mode (buffering)
lines.append(self._filter.processedLine()) lines.append(self._filter.processedLine())
line = "\n".join(lines) line = "\n".join(lines)
return line, ret, is_ignored return line, ret, (is_ignored or self._lineIgnored)
def _prepaireOutput(self): def _prepaireOutput(self):
"""Prepares output- and fetch-function corresponding given '--out' option (format)""" """Prepares output- and fetch-function corresponding given '--out' option (format)"""
@ -558,8 +556,7 @@ class Fail2banRegex(object):
out = self._prepaireOutput() out = self._prepaireOutput()
for line in test_lines: for line in test_lines:
if isinstance(line, tuple): if isinstance(line, tuple):
line_datetimestripped, ret, is_ignored = self.testRegex( line_datetimestripped, ret, is_ignored = self.testRegex(line[0], line[1])
line[0], line[1])
line = "".join(line[0]) line = "".join(line[0])
else: else:
line = line.rstrip('\r\n') line = line.rstrip('\r\n')
@ -567,11 +564,9 @@ class Fail2banRegex(object):
# skip comment and empty lines # skip comment and empty lines
continue continue
line_datetimestripped, ret, is_ignored = self.testRegex(line) line_datetimestripped, ret, is_ignored = self.testRegex(line)
if not is_ignored:
is_ignored = self.testIgnoreRegex(line_datetimestripped)
if self._opts.out: # (formated) output: if self._opts.out: # (formated) output:
if len(ret) > 0: out(ret) if len(ret) > 0 and not is_ignored: out(ret)
continue continue
if is_ignored: if is_ignored:

View File

@ -107,6 +107,8 @@ class Filter(JailThread):
self.checkAllRegex = False self.checkAllRegex = False
## avoid finding of pending failures (without ID/IP, used in fail2ban-regex): ## avoid finding of pending failures (without ID/IP, used in fail2ban-regex):
self.ignorePending = True self.ignorePending = True
## callback called on ignoreregex match :
self.onIgnoreRegex = None
## if true ignores obsolete failures (failure time < now - findTime): ## if true ignores obsolete failures (failure time < now - findTime):
self.checkFindTime = True self.checkFindTime = True
## Ticks counter ## Ticks counter
@ -170,7 +172,7 @@ class Filter(JailThread):
# @param value the regular expression # @param value the regular expression
def addFailRegex(self, value): def addFailRegex(self, value):
multiLine = self.getMaxLines() > 1 multiLine = self.__lineBufferSize > 1
try: try:
regex = FailRegex(value, prefRegex=self.__prefRegex, multiline=multiLine, regex = FailRegex(value, prefRegex=self.__prefRegex, multiline=multiLine,
useDns=self.__useDns) useDns=self.__useDns)
@ -575,20 +577,33 @@ class Filter(JailThread):
""" """
if date: if date:
tupleLine = line tupleLine = line
self.__lastTimeText = tupleLine[1]
self.__lastDate = date
else: else:
l = line.rstrip('\r\n')
logSys.log(7, "Working on line %r", line) logSys.log(7, "Working on line %r", line)
(timeMatch, template) = self.dateDetector.matchTime(l) # try to parse date:
if timeMatch: timeMatch = self.dateDetector.matchTime(line)
tupleLine = ( m = timeMatch[0]
l[:timeMatch.start(1)], if m:
l[timeMatch.start(1):timeMatch.end(1)], s = m.start(1)
l[timeMatch.end(1):], e = m.end(1)
(timeMatch, template) m = line[s:e]
) tupleLine = (line[:s], m, line[e:])
if m: # found and not empty - retrive date:
date = self.dateDetector.getTime(m, timeMatch)
if date is None:
if m: logSys.error("findFailure failed to parse timeText: %s", m)
date = self.__lastDate
else: else:
tupleLine = (l, "", "", None) # Lets get the time part
date = date[0]
self.__lastTimeText = m
self.__lastDate = date
else:
tupleLine = (line, self.__lastTimeText, "")
date = self.__lastDate
# save last line (lazy convert of process line tuple to string on demand): # save last line (lazy convert of process line tuple to string on demand):
self.processedLine = lambda: "".join(tupleLine[::2]) self.processedLine = lambda: "".join(tupleLine[::2])
@ -630,20 +645,26 @@ class Filter(JailThread):
self._errors //= 2 self._errors //= 2
self.idle = True self.idle = True
## def _ignoreLine(self, buf, orgBuffer, failRegex=None):
# Returns true if the line should be ignored. # if multi-line buffer - use matched only, otherwise (single line) - original buf:
# if failRegex and self.__lineBufferSize > 1:
# Uses ignoreregex. orgBuffer = failRegex.getMatchedTupleLines()
# @param line: the line buf = Regex._tupleLinesBuf(orgBuffer)
# @return: a boolean # search ignored:
fnd = None
def ignoreLine(self, tupleLines):
buf = Regex._tupleLinesBuf(tupleLines)
for ignoreRegexIndex, ignoreRegex in enumerate(self.__ignoreRegex): for ignoreRegexIndex, ignoreRegex in enumerate(self.__ignoreRegex):
ignoreRegex.search(buf, tupleLines) ignoreRegex.search(buf, orgBuffer)
if ignoreRegex.hasMatched(): if ignoreRegex.hasMatched():
return ignoreRegexIndex fnd = ignoreRegexIndex
return None logSys.log(7, " Matched ignoreregex %d and was ignored", fnd)
if self.onIgnoreRegex: self.onIgnoreRegex(fnd, ignoreRegex)
# remove ignored match:
if not self.checkAllRegex or self.__lineBufferSize > 1:
# todo: check ignoreRegex.getUnmatchedTupleLines() would be better (fix testGetFailuresMultiLineIgnoreRegex):
if failRegex:
self.__lineBuffer = failRegex.getUnmatchedTupleLines()
if not self.checkAllRegex: break
return fnd
def _updateUsers(self, fail, user=()): def _updateUsers(self, fail, user=()):
users = fail.get('users') users = fail.get('users')
@ -713,7 +734,7 @@ class Filter(JailThread):
# to find the logging time. # to find the logging time.
# @return a dict with IP and timestamp. # @return a dict with IP and timestamp.
def findFailure(self, tupleLine, date=None): def findFailure(self, tupleLine, date):
failList = list() failList = list()
ll = logSys.getEffectiveLevel() ll = logSys.getEffectiveLevel()
@ -723,62 +744,38 @@ class Filter(JailThread):
returnRawHost = True returnRawHost = True
cidr = IPAddr.CIDR_RAW cidr = IPAddr.CIDR_RAW
# Checks if we mut ignore this line.
if self.ignoreLine([tupleLine[::2]]) is not None:
# The ignoreregex matched. Return.
if ll <= 7: logSys.log(7, "Matched ignoreregex and was \"%s\" ignored",
"".join(tupleLine[::2]))
return failList
timeText = tupleLine[1]
if date:
self.__lastTimeText = timeText
self.__lastDate = date
elif timeText:
dateTimeMatch = self.dateDetector.getTime(timeText, tupleLine[3])
if dateTimeMatch is None:
logSys.error("findFailure failed to parse timeText: %s", timeText)
date = self.__lastDate
else:
# Lets get the time part
date = dateTimeMatch[0]
self.__lastTimeText = timeText
self.__lastDate = date
else:
timeText = self.__lastTimeText or "".join(tupleLine[::2])
date = self.__lastDate
if self.checkFindTime and date is not None and date < MyTime.time() - self.getFindTime(): if self.checkFindTime and date is not None and date < MyTime.time() - self.getFindTime():
if ll <= 5: logSys.log(5, "Ignore line since time %s < %s - %s", if ll <= 5: logSys.log(5, "Ignore line since time %s < %s - %s",
date, MyTime.time(), self.getFindTime()) date, MyTime.time(), self.getFindTime())
return failList return failList
if self.__lineBufferSize > 1: if self.__lineBufferSize > 1:
orgBuffer = self.__lineBuffer = ( self.__lineBuffer.append(tupleLine)
self.__lineBuffer + [tupleLine[:3]])[-self.__lineBufferSize:] orgBuffer = self.__lineBuffer = self.__lineBuffer[-self.__lineBufferSize:]
else: else:
orgBuffer = self.__lineBuffer = [tupleLine[:3]] orgBuffer = self.__lineBuffer = [tupleLine]
if ll <= 5: logSys.log(5, "Looking for match of %r", self.__lineBuffer) if ll <= 5: logSys.log(5, "Looking for match of %r", orgBuffer)
buf = Regex._tupleLinesBuf(self.__lineBuffer) buf = Regex._tupleLinesBuf(orgBuffer)
# Checks if we must ignore this line (only if fewer ignoreregex than failregex).
if self.__ignoreRegex and len(self.__ignoreRegex) < len(self.__failRegex) - 2:
if self._ignoreLine(buf, orgBuffer) is not None:
# The ignoreregex matched. Return.
return failList
# Pre-filter fail regex (if available): # Pre-filter fail regex (if available):
preGroups = {} preGroups = {}
if self.__prefRegex: if self.__prefRegex:
if ll <= 5: logSys.log(5, " Looking for prefregex %r", self.__prefRegex.getRegex()) if ll <= 5: logSys.log(5, " Looking for prefregex %r", self.__prefRegex.getRegex())
self.__prefRegex.search(buf, self.__lineBuffer) self.__prefRegex.search(buf, orgBuffer)
if not self.__prefRegex.hasMatched(): if not self.__prefRegex.hasMatched():
if ll <= 5: logSys.log(5, " Prefregex not matched") if ll <= 5: logSys.log(5, " Prefregex not matched")
return failList return failList
preGroups = self.__prefRegex.getGroups() preGroups = self.__prefRegex.getGroups()
if ll <= 7: logSys.log(7, " Pre-filter matched %s", preGroups) if ll <= 7: logSys.log(7, " Pre-filter matched %s", preGroups)
repl = preGroups.get('content') repl = preGroups.pop('content', None)
# Content replacement: # Content replacement:
if repl: if repl:
del preGroups['content']
self.__lineBuffer, buf = [('', '', repl)], None self.__lineBuffer, buf = [('', '', repl)], None
# Iterates over all the regular expressions. # Iterates over all the regular expressions.
@ -796,14 +793,11 @@ class Filter(JailThread):
# The failregex matched. # The failregex matched.
if ll <= 7: logSys.log(7, " Matched failregex %d: %s", failRegexIndex, fail) if ll <= 7: logSys.log(7, " Matched failregex %d: %s", failRegexIndex, fail)
# Checks if we must ignore this match. # Checks if we must ignore this match.
if self.ignoreLine(failRegex.getMatchedTupleLines()) \ if self.__ignoreRegex and self._ignoreLine(buf, orgBuffer, failRegex) is not None:
is not None:
# The ignoreregex matched. Remove ignored match. # The ignoreregex matched. Remove ignored match.
self.__lineBuffer, buf = failRegex.getUnmatchedTupleLines(), None buf = None
if ll <= 7: logSys.log(7, " Matched ignoreregex and was ignored")
if not self.checkAllRegex: if not self.checkAllRegex:
break break
else:
continue continue
if date is None: if date is None:
logSys.warning( logSys.warning(
@ -814,10 +808,10 @@ class Filter(JailThread):
"file a detailed issue on" "file a detailed issue on"
" https://github.com/fail2ban/fail2ban/issues " " https://github.com/fail2ban/fail2ban/issues "
"in order to get support for this format.", "in order to get support for this format.",
"\n".join(failRegex.getMatchedLines()), timeText) "\n".join(failRegex.getMatchedLines()), tupleLine[1])
continue continue
# we should check all regex (bypass on multi-line, otherwise too complex): # we should check all regex (bypass on multi-line, otherwise too complex):
if not self.checkAllRegex or self.getMaxLines() > 1: if not self.checkAllRegex or self.__lineBufferSize > 1:
self.__lineBuffer, buf = failRegex.getUnmatchedTupleLines(), None self.__lineBuffer, buf = failRegex.getUnmatchedTupleLines(), None
# merge data if multi-line failure: # merge data if multi-line failure:
raw = returnRawHost raw = returnRawHost
@ -1056,7 +1050,7 @@ class FileFilter(Filter):
if not line or not self.active: if not line or not self.active:
# The jail reached the bottom or has been stopped # The jail reached the bottom or has been stopped
break break
self.processLineAndAdd(line) self.processLineAndAdd(line.rstrip('\r\n'))
finally: finally:
log.close() log.close()
db = self.jail.database db = self.jail.database

View File

@ -63,9 +63,6 @@ def open(*args):
if len(args) == 2: if len(args) == 2:
# ~50kB buffer should be sufficient for all tests here. # ~50kB buffer should be sufficient for all tests here.
args = args + (50000,) args = args + (50000,)
if sys.version_info >= (3,):
return fopen(*args, **{'encoding': 'utf-8', 'errors': 'ignore'})
else:
return fopen(*args) return fopen(*args)
@ -200,7 +197,7 @@ def _copy_lines_between_files(in_, fout, n=None, skip=0, mode='a', terminal_line
# polling filter could detect the change # polling filter could detect the change
mtimesleep() mtimesleep()
if isinstance(in_, str): # pragma: no branch - only used with str in test cases if isinstance(in_, str): # pragma: no branch - only used with str in test cases
fin = open(in_, 'r') fin = open(in_, 'rb')
else: else:
fin = in_ fin = in_
# Skip # Skip
@ -210,7 +207,7 @@ def _copy_lines_between_files(in_, fout, n=None, skip=0, mode='a', terminal_line
i = 0 i = 0
lines = [] lines = []
while n is None or i < n: while n is None or i < n:
l = fin.readline() l = FileContainer.decode_line(in_, 'UTF-8', fin.readline()).rstrip('\r\n')
if terminal_line is not None and l == terminal_line: if terminal_line is not None and l == terminal_line:
break break
lines.append(l) lines.append(l)
@ -238,7 +235,7 @@ def _copy_lines_to_journal(in_, fields={},n=None, skip=0, terminal_line=""): # p
Returns None Returns None
""" """
if isinstance(in_, str): # pragma: no branch - only used with str in test cases if isinstance(in_, str): # pragma: no branch - only used with str in test cases
fin = open(in_, 'r') fin = open(in_, 'rb')
else: else:
fin = in_ fin = in_
# Required for filtering # Required for filtering
@ -249,7 +246,7 @@ def _copy_lines_to_journal(in_, fields={},n=None, skip=0, terminal_line=""): # p
# Read/Write # Read/Write
i = 0 i = 0
while n is None or i < n: while n is None or i < n:
l = fin.readline() l = FileContainer.decode_line(in_, 'UTF-8', fin.readline()).rstrip('\r\n')
if terminal_line is not None and l == terminal_line: if terminal_line is not None and l == terminal_line:
break break
journal.send(MESSAGE=l.strip(), **fields) journal.send(MESSAGE=l.strip(), **fields)
@ -1583,9 +1580,9 @@ class GetFailures(LogCaptureTestCase):
# We first adjust logfile/failures to end with CR+LF # We first adjust logfile/failures to end with CR+LF
fname = tempfile.mktemp(prefix='tmp_fail2ban', suffix='crlf') fname = tempfile.mktemp(prefix='tmp_fail2ban', suffix='crlf')
# poor man unix2dos: # poor man unix2dos:
fin, fout = open(GetFailures.FILENAME_01), open(fname, 'w') fin, fout = open(GetFailures.FILENAME_01, 'rb'), open(fname, 'wb')
for l in fin.readlines(): for l in fin.read().splitlines():
fout.write('%s\r\n' % l.rstrip('\n')) fout.write(l + b'\r\n')
fin.close() fin.close()
fout.close() fout.close()

View File

@ -32,7 +32,7 @@ import sys
import time import time
import unittest import unittest
from ..server.failregex import Regex from ..server.failregex import Regex
from ..server.filter import Filter from ..server.filter import Filter, FileContainer
from ..client.filterreader import FilterReader from ..client.filterreader import FilterReader
from .utils import setUpMyTime, tearDownMyTime, TEST_NOW, CONFIG_DIR from .utils import setUpMyTime, tearDownMyTime, TEST_NOW, CONFIG_DIR
@ -157,10 +157,11 @@ def testSampleRegexsFactory(name, basedir):
while i < len(filenames): while i < len(filenames):
filename = filenames[i]; i += 1; filename = filenames[i]; i += 1;
logFile = fileinput.FileInput(os.path.join(TEST_FILES_DIR, "logs", logFile = fileinput.FileInput(os.path.join(TEST_FILES_DIR, "logs",
filename)) filename), mode='rb')
ignoreBlock = False ignoreBlock = False
for line in logFile: for line in logFile:
line = FileContainer.decode_line(logFile.filename(), 'UTF-8', line)
jsonREMatch = re.match("^#+ ?(failJSON|(?:file|filter)Options|addFILE):(.+)$", line) jsonREMatch = re.match("^#+ ?(failJSON|(?:file|filter)Options|addFILE):(.+)$", line)
if jsonREMatch: if jsonREMatch:
try: try:
@ -202,6 +203,7 @@ def testSampleRegexsFactory(name, basedir):
raise ValueError("%s: %s:%i" % raise ValueError("%s: %s:%i" %
(e, logFile.filename(), logFile.filelineno())) (e, logFile.filename(), logFile.filelineno()))
line = next(logFile) line = next(logFile)
line = FileContainer.decode_line(logFile.filename(), 'UTF-8', line)
elif ignoreBlock or line.startswith("#") or not line.strip(): elif ignoreBlock or line.startswith("#") or not line.strip():
continue continue
else: # pragma: no cover - normally unreachable else: # pragma: no cover - normally unreachable
@ -214,6 +216,7 @@ def testSampleRegexsFactory(name, basedir):
flt = self._readFilter(fltName, name, basedir, opts=None) flt = self._readFilter(fltName, name, basedir, opts=None)
self._filterTests = [(fltName, flt, {})] self._filterTests = [(fltName, flt, {})]
line = line.rstrip('\r\n')
# process line using several filter options (if specified in the test-file): # process line using several filter options (if specified in the test-file):
for fltName, flt, opts in self._filterTests: for fltName, flt, opts in self._filterTests:
# Bypass if constraint (as expression) is not valid: # Bypass if constraint (as expression) is not valid:
@ -230,7 +233,7 @@ def testSampleRegexsFactory(name, basedir):
else: # simulate journal processing, time is known from journal (formatJournalEntry): else: # simulate journal processing, time is known from journal (formatJournalEntry):
if opts.get('test.prefix-line'): # journal backends creates common prefix-line: if opts.get('test.prefix-line'): # journal backends creates common prefix-line:
line = opts.get('test.prefix-line') + line line = opts.get('test.prefix-line') + line
ret = flt.processLine(('', TEST_NOW_STR, line.rstrip('\r\n')), TEST_NOW) ret = flt.processLine(('', TEST_NOW_STR, line), TEST_NOW)
if ret: if ret:
# filter matched only (in checkAllRegex mode it could return 'nofail' too): # filter matched only (in checkAllRegex mode it could return 'nofail' too):
found = [] found = []