From c886414e2e855c8bf982f7bfd135e5bd92d16dc0 Mon Sep 17 00:00:00 2001 From: Steven Hiscocks Date: Wed, 4 Dec 2013 22:05:17 +0000 Subject: [PATCH] ENH+BF: Capture multiline matched lines into fail ticket Previously only the last line of the match was being saved, not all lines involved in matching. Log lines are now broken into 3 part tuple, with the line pre-datetime, the datetime, and post-datetime. Allows reformation of full line, but also use of the line without the datetime present. Attempting to use the term "tupleLine(s)" where possible, to avoid confusion with normal read lines. May also wish to consider that regexs could be made to capture more lines of interest if some form of unique reference is available. This may allow more lines of interest to be captured, which may not be picked up by the traditional "grep " approach i.e. ones which do not have the ip address in. This also simplified the fail2ban-regex statistics for missed lines. Also resolved bug with missed lines time extracted for debuggex having some lines present which were captured in a multiline regex. Also resolved independent issue with ignored line check including the datetime, which raised assertion error in the rare case the datetime matched the ignore regex, and the rest of line only matched a failregex --- bin/fail2ban-regex | 34 ++++++-------- fail2ban/server/failregex.py | 56 ++++++++++++++++------ fail2ban/server/filter.py | 78 +++++++++++++++---------------- fail2ban/tests/filtertestcase.py | 15 +++--- fail2ban/tests/samplestestcase.py | 2 +- 5 files changed, 100 insertions(+), 85 deletions(-) diff --git a/bin/fail2ban-regex b/bin/fail2ban-regex index 3661ba363..bd99068dd 100755 --- a/bin/fail2ban-regex +++ b/bin/fail2ban-regex @@ -309,7 +309,7 @@ class Fail2banRegex(object): def testIgnoreRegex(self, line): found = False try: - ret = self._filter.ignoreLine(line) + ret = self._filter.ignoreLine([(line,"","")]) if ret is not None: found = True regex = self._ignoreregex[ret].inc() @@ -338,35 +338,27 @@ class Fail2banRegex(object): return False for bufLine in orgLineBuffer[int(fullBuffer):]: if bufLine not in self._filter._Filter__lineBuffer: - if self.removeMissedLine(bufLine): + try: + self._line_stats.missed_lines.pop( + self._line_stats.missed_lines.index("".join(bufLine))) + self._line_stats.missed_lines_timeextracted.pop( + self._line_stats.missed_lines_timeextracted.index( + "".join(bufLine[::2]))) + except ValueError: + pass + else: self._line_stats.matched += 1 return line, ret - def removeMissedLine(self, line): - """Remove `line` from missed lines, by comparing without time match""" - for n, missed_line in \ - enumerate(reversed(self._line_stats.missed_lines)): - timeMatch = self._filter.dateDetector.matchTime( - missed_line, incHits=False) - if timeMatch: - logLine = (missed_line[:timeMatch.start()] + - missed_line[timeMatch.end():]) - else: - logLine = missed_line - if logLine.rstrip("\r\n") == line: - self._line_stats.missed_lines.pop( - len(self._line_stats.missed_lines) - n - 1) - return True - return False - def process(self, test_lines): for line_no, line in enumerate(test_lines): - if line.startswith('#') or not line.strip(): + line = line.strip('\r\n') + if line.startswith('#') or not line: # skip comment and empty lines continue - is_ignored = fail2banRegex.testIgnoreRegex(line) line_datetimestripped, ret = fail2banRegex.testRegex(line) + is_ignored = fail2banRegex.testIgnoreRegex(line_datetimestripped) if is_ignored: self._line_stats.ignored_lines.append(line) diff --git a/fail2ban/server/failregex.py b/fail2ban/server/failregex.py index 23be3b443..f72799317 100644 --- a/fail2ban/server/failregex.py +++ b/fail2ban/server/failregex.py @@ -74,8 +74,9 @@ class Regex: # method of this object. # @param value the line - def search(self, value): - self._matchCache = self._regexObj.search(value) + def search(self, tupleLines): + self._matchCache = self._regexObj.search( + "\n".join("".join(value[::2]) for value in tupleLines) + "\n") if self.hasMatched(): # Find start of the first line where the match was found try: @@ -89,8 +90,26 @@ class Regex: "\n", self._matchCache.end() - 1) + 1 except ValueError: self._matchLineEnd = len(self._matchCache.string) - - ## + + + lineCount1 = self._matchCache.string.count( + "\n", 0, self._matchLineStart) + lineCount2 = self._matchCache.string.count( + "\n", 0, self._matchLineEnd) + self._matchedTupleLines = tupleLines[lineCount1:lineCount2] + self._unmatchedTupleLines = tupleLines[:lineCount1] + + n = 0 + for skippedLine in self.getSkippedLines(): + for m, matchedTupleLine in enumerate( + self._matchedTupleLines[n:]): + if "".join(matchedTupleLine[::2]) == skippedLine: + self._unmatchedTupleLines.append( + self._matchedTupleLines.pop(n+m)) + n += m + break + self._unmatchedTupleLines.extend(tupleLines[lineCount2:]) + # Checks if the previous call to search() matched. # # @return True if a match was found, False otherwise @@ -125,15 +144,18 @@ class Regex: # # This returns unmatched lines including captured by the tag. # @return list of unmatched lines - + + def getUnmatchedTupleLines(self): + if not self.hasMatched(): + return [] + else: + return self._unmatchedTupleLines + def getUnmatchedLines(self): if not self.hasMatched(): return [] - unmatchedLines = ( - self._matchCache.string[:self._matchLineStart].splitlines(False) - + self.getSkippedLines() - + self._matchCache.string[self._matchLineEnd:].splitlines(False)) - return unmatchedLines + else: + return ["".join(line) for line in self._unmatchedTupleLines] ## # Returns matched lines. @@ -141,14 +163,18 @@ class Regex: # This returns matched lines by excluding those captured # by the tag. # @return list of matched lines - + + def getMatchedTupleLines(self): + if not self.hasMatched(): + return [] + else: + return self._matchedTupleLines + def getMatchedLines(self): if not self.hasMatched(): return [] - matchedLines = self._matchCache.string[ - self._matchLineStart:self._matchLineEnd].splitlines(False) - return [line for line in matchedLines - if line not in self.getSkippedLines()] + else: + return ["".join(line) for line in self._matchedTupleLines] ## # Exception dedicated to the class Regex. diff --git a/fail2ban/server/filter.py b/fail2ban/server/filter.py index e2da5e77c..051135b05 100644 --- a/fail2ban/server/filter.py +++ b/fail2ban/server/filter.py @@ -366,17 +366,12 @@ class Filter(JailThread): timeMatch = self.dateDetector.matchTime(l) if timeMatch: - # Lets split into time part and log part of the line - timeText = timeMatch.group() - # Lets leave the beginning in as well, so if there is no - # anchore at the beginning of the time regexp, we don't - # at least allow injection. Should be harmless otherwise - logLine = l[:timeMatch.start()] + l[timeMatch.end():] + tupleLine = (l[:timeMatch.start()], l[timeMatch.start():timeMatch.end()] , l[timeMatch.end():]) else: - timeText = None - logLine = l + tupleLine = (l,"","") - return logLine, self.findFailure(timeText, logLine, returnRawHost, checkAllRegex) + return "".join(tupleLine[::2]), self.findFailure( + tupleLine, returnRawHost, checkAllRegex) def processLineAndAdd(self, line): """Processes the line for failures and populates failManager @@ -385,6 +380,7 @@ class Filter(JailThread): failregex = element[0] ip = element[1] unixTime = element[2] + lines = element[3] logSys.debug("Processing line with time:%s and ip:%s" % (unixTime, ip)) if unixTime < MyTime.time() - self.getFindTime(): @@ -396,7 +392,7 @@ class Filter(JailThread): continue logSys.debug("Found %s" % ip) ## print "D: Adding a ticket for %s" % ((ip, unixTime, [line]),) - self.failManager.addFailure(FailTicket(ip, unixTime, [line])) + self.failManager.addFailure(FailTicket(ip, unixTime, lines)) ## # Returns true if the line should be ignored. @@ -405,9 +401,9 @@ class Filter(JailThread): # @param line: the line # @return: a boolean - def ignoreLine(self, line): + def ignoreLine(self, tupleLines): for ignoreRegexIndex, ignoreRegex in enumerate(self.__ignoreRegex): - ignoreRegex.search(line) + ignoreRegex.search(tupleLines) if ignoreRegex.hasMatched(): return ignoreRegexIndex return None @@ -419,17 +415,17 @@ class Filter(JailThread): # to find the logging time. # @return a dict with IP and timestamp. - def findFailure(self, timeText, logLine, - returnRawHost=False, checkAllRegex=False): + def findFailure(self, tupleLine, returnRawHost=False, checkAllRegex=False): failList = list() # Checks if we must ignore this line. - if self.ignoreLine(logLine) is not None: + if self.ignoreLine([tupleLine[::2]]) is not None: # The ignoreregex matched. Return. - logSys.log(7, "Matched ignoreregex and was \"%s\" ignored", logLine) + logSys.log(7, "Matched ignoreregex and was \"%s\" ignored", + "".join(tupleLine[::2])) return failList - + timeText = tupleLine[1] if timeText: dateTimeMatch = self.dateDetector.getTime(timeText) @@ -446,49 +442,53 @@ class Filter(JailThread): self.__lastTimeText = timeText self.__lastDate = date else: - timeText = self.__lastTimeText or logLine + timeText = self.__lastTimeText or "".join(tupleLine[::2]) date = self.__lastDate - self.__lineBuffer = (self.__lineBuffer + [logLine])[-self.__lineBufferSize:] - - logLine = "\n".join(self.__lineBuffer) + "\n" + self.__lineBuffer = ( + self.__lineBuffer + [tupleLine])[-self.__lineBufferSize:] # Iterates over all the regular expressions. for failRegexIndex, failRegex in enumerate(self.__failRegex): - failRegex.search(logLine) + failRegex.search(self.__lineBuffer) if failRegex.hasMatched(): + # The failregex matched. + logSys.log(7, "Matched %s", failRegex) # Checks if we must ignore this match. - if self.ignoreLine( - "\n".join(failRegex.getMatchedLines()) + "\n") \ + if self.ignoreLine(failRegex.getMatchedTupleLines()) \ is not None: # The ignoreregex matched. Remove ignored match. - self.__lineBuffer = failRegex.getUnmatchedLines() + self.__lineBuffer = failRegex.getUnmatchedTupleLines() logSys.log(7, "Matched ignoreregex and was ignored") - continue - # The failregex matched. - logSys.log(7, "Matched %s", failRegex) + if not checkAllRegex: + break + else: + continue if date is None: - logSys.debug("Found a match for %r but no valid date/time " - "found for %r. Please try setting a custom " - "date pattern (see man page jail.conf(5)). " - "If format is complex, please " - "file a detailed issue on" - " https://github.com/fail2ban/fail2ban/issues " - "in order to get support for this format." - % (logLine, timeText)) + logSys.debug( + "Found a match for %r but no valid date/time " + "found for %r. Please try setting a custom " + "date pattern (see man page jail.conf(5)). " + "If format is complex, please " + "file a detailed issue on" + " https://github.com/fail2ban/fail2ban/issues " + "in order to get support for this format." + % ("\n".join(failRegex.getMatchedLines()), timeText)) else: - self.__lineBuffer = failRegex.getUnmatchedLines() + self.__lineBuffer = failRegex.getUnmatchedTupleLines() try: host = failRegex.getHost() if returnRawHost: - failList.append([failRegexIndex, host, date]) + failList.append([failRegexIndex, host, date, + failRegex.getMatchedLines()]) if not checkAllRegex: break else: ipMatch = DNSUtils.textToIp(host, self.__useDns) if ipMatch: for ip in ipMatch: - failList.append([failRegexIndex, ip, date]) + failList.append([failRegexIndex, ip, date, + failRegex.getMatchedLines()]) if not checkAllRegex: break except RegexException, e: # pragma: no cover - unsure if reachable diff --git a/fail2ban/tests/filtertestcase.py b/fail2ban/tests/filtertestcase.py index 85d0ec167..9b6ca0d03 100644 --- a/fail2ban/tests/filtertestcase.py +++ b/fail2ban/tests/filtertestcase.py @@ -710,7 +710,7 @@ class GetFailures(unittest.TestCase): # so that they could be reused by other tests FAILURES_01 = ('193.168.0.128', 3, 1124017199.0, - [u'Aug 14 11:59:59 [sshd] error: PAM: Authentication failure for kevin from 193.168.0.128\n']*3) + [u'Aug 14 11:59:59 [sshd] error: PAM: Authentication failure for kevin from 193.168.0.128']*3) def setUp(self): """Call before every test case.""" @@ -747,16 +747,13 @@ class GetFailures(unittest.TestCase): fout.close() # now see if we should be getting the "same" failures - self.testGetFailures01(filename=fname, - failures=GetFailures.FAILURES_01[:3] + - ([x.rstrip('\n') + '\r\n' for x in - GetFailures.FAILURES_01[-1]],)) + self.testGetFailures01(filename=fname) _killfile(fout, fname) def testGetFailures02(self): output = ('141.3.81.106', 4, 1124017139.0, - [u'Aug 14 11:%d:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:141.3.81.106 port 51332 ssh2\n' + [u'Aug 14 11:%d:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:141.3.81.106 port 51332 ssh2' % m for m in 53, 54, 57, 58]) self.filter.addLogPath(GetFailures.FILENAME_02) @@ -789,11 +786,11 @@ class GetFailures(unittest.TestCase): def testGetFailuresUseDNS(self): # We should still catch failures with usedns = no ;-) output_yes = ('93.184.216.119', 2, 1124017139.0, - [u'Aug 14 11:54:59 i60p295 sshd[12365]: Failed publickey for roehl from example.com port 51332 ssh2\n', - u'Aug 14 11:58:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:93.184.216.119 port 51332 ssh2\n']) + [u'Aug 14 11:54:59 i60p295 sshd[12365]: Failed publickey for roehl from example.com port 51332 ssh2', + u'Aug 14 11:58:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:93.184.216.119 port 51332 ssh2']) output_no = ('93.184.216.119', 1, 1124017139.0, - [u'Aug 14 11:58:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:93.184.216.119 port 51332 ssh2\n']) + [u'Aug 14 11:58:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:93.184.216.119 port 51332 ssh2']) # Actually no exception would be raised -- it will be just set to 'no' #self.assertRaises(ValueError, diff --git a/fail2ban/tests/samplestestcase.py b/fail2ban/tests/samplestestcase.py index 6787ca6ec..f616ba1ea 100644 --- a/fail2ban/tests/samplestestcase.py +++ b/fail2ban/tests/samplestestcase.py @@ -119,7 +119,7 @@ def testSampleRegexsFactory(name): (map(lambda x: x[0], ret),logFile.filename(), logFile.filelineno())) # Verify timestamp and host as expected - failregex, host, fail2banTime = ret[0] + failregex, host, fail2banTime, lines = ret[0] self.assertEqual(host, faildata.get("host", None)) t = faildata.get("time", None)