diff --git a/fail2ban/server/filter.py b/fail2ban/server/filter.py index dae5c527..2dfeb174 100644 --- a/fail2ban/server/filter.py +++ b/fail2ban/server/filter.py @@ -24,6 +24,7 @@ __license__ = "GPL" import codecs import fcntl import locale +import logging import os import re import sys @@ -190,6 +191,7 @@ class Filter(JailThread): # @param value the time def setFindTime(self, value): + value = MyTime.str2seconds(value) self.__findTime = value self.failManager.setMaxTime(value) logSys.info("Set findtime = %s" % value) @@ -651,7 +653,7 @@ class FileFilter(Filter): # MyTime.time()-self.findTime. When a failure is detected, a FailTicket # is created and is added to the FailManager. - def getFailures(self, filename): + def getFailures(self, filename, startTime=None): log = self.getLog(filename) if log is None: logSys.error("Unable to get failures in " + filename) @@ -673,6 +675,11 @@ class FileFilter(Filter): logSys.exception(e) return False + # prevent completely read of big files first time (after start of service), initial seek to start time using half-interval search algorithm: + if log.getPos() == 0 and startTime is not None: + # startTime = MyTime.time() - self.getFindTime() + self.seekToTime(log, startTime) + # yoh: has_content is just a bool, so do not expect it to # change -- loop is exited upon break, and is not entered at # all if upon container opening that one was empty. If we @@ -690,6 +697,74 @@ class FileFilter(Filter): db.updateLog(self.jail, log) return True + ## + # Seeks to line with date (search using half-interval search algorithm), to start polling from it + # + + def seekToTime(self, container, date): + fs = container.getFileSize() + if logSys.getEffectiveLevel() <= logging.DEBUG: + logSys.debug("Seek to find time %s (%s), file size %s", date, + datetime.datetime.fromtimestamp(date).strftime("%Y-%m-%d %H:%M:%S"), fs) + date -= 0.009 + minp = 0 + maxp = fs + lastpos = 0 + lastFew = 0 + lastTime = None + cntr = 0 + unixTime = None + lasti = 0 + movecntr = 3 + while maxp > minp: + i = int(minp + (maxp - minp) / 2) + pos = container.seek(i) + cntr += 1 + # within next 5 lines try to find any legal datetime: + lncntr = 5; + dateTimeMatch = None + llen = 0 + i = pos + while True: + line = container.readline() + if not line: + break + llen += len(line) + l = line.rstrip('\r\n') + timeMatch = self.dateDetector.matchTime(l) + if timeMatch: + dateTimeMatch = self.dateDetector.getTime(l[timeMatch.start():timeMatch.end()]) + if not dateTimeMatch and lncntr: + lncntr -= 1 + continue + break + # if we can't move (position not changed) + if i + llen == lasti: + movecntr -= 1 + if movecntr <= 0: + break + lasti = i + llen; + # not found at this step - stop searching + if not dateTimeMatch: + break + unixTime = dateTimeMatch[0] + if unixTime >= date: + maxp = i + else: + minp = i + llen + lastFew = pos; + lastTime = unixTime + lastpos = pos + # if found position have a time greater as given - use smallest time we have found + if unixTime is None or unixTime > date: + unixTime = lastTime + lastpos = container.seek(lastFew, False) + else: + lastpos = container.seek(lastpos, False) + if logSys.getEffectiveLevel() <= logging.DEBUG: + logSys.debug("Position %s from %s, found time %s (%s) within %s seeks", lastpos, fs, unixTime, + (datetime.datetime.fromtimestamp(unixTime).strftime("%Y-%m-%d %H:%M:%S") if unixTime is not None else ''), cntr) + def status(self, flavor="basic"): """Status of Filter plus files being monitored. """ @@ -742,6 +817,9 @@ class FileContainer: def getFileName(self): return self.__filename + def getFileSize(self): + return os.path.getsize(self.__filename); + def setEncoding(self, encoding): codecs.lookup(encoding) # Raises LookupError if invalid self.__encoding = encoding @@ -788,6 +866,16 @@ class FileContainer: self.__handler.seek(self.__pos) return True + def seek(self, offs, endLine=True): + h = self.__handler + # seek to given position + h.seek(offs, 0) + # goto end of next line + if endLine: + h.readline() + # get current real position + return h.tell() + @staticmethod def decode_line(filename, enc, line): try: diff --git a/fail2ban/server/filterpoll.py b/fail2ban/server/filterpoll.py index d0b37775..0e99455e 100644 --- a/fail2ban/server/filterpoll.py +++ b/fail2ban/server/filterpoll.py @@ -57,6 +57,7 @@ class FilterPoll(FileFilter): ## The time of the last modification of the file. self.__prevStats = dict() self.__file404Cnt = dict() + self.__initial = dict() logSys.debug("Created FilterPoll") ## @@ -94,7 +95,11 @@ class FilterPoll(FileFilter): for container in self.getLogs(): filename = container.getFileName() if self.isModified(filename): - self.getFailures(filename) + # set start time as now - find time for first usage only (prevent performance bug with polling of big files) + self.getFailures(filename, + (MyTime.time() - self.getFindTime()) if not self.__initial.get(filename) else None + ) + self.__initial[filename] = True self.__modified = True if self.__modified: diff --git a/fail2ban/tests/filtertestcase.py b/fail2ban/tests/filtertestcase.py index 40879b66..c7dba5ce 100644 --- a/fail2ban/tests/filtertestcase.py +++ b/fail2ban/tests/filtertestcase.py @@ -912,6 +912,15 @@ class GetFailures(LogCaptureTestCase): self.filter.getFailures(GetFailures.FILENAME_03) _assert_correct_last_attempt(self, self.filter, output) + def testGetFailures03_seek(self): + # same test as above but with seek to 'Aug 14 11:55:04' - so other output ... + output = ('203.162.223.135', 5, 1124013544.0) + + self.filter.addLogPath(GetFailures.FILENAME_03) + self.filter.addFailRegex("error,relay=,.*550 User unknown") + self.filter.getFailures(GetFailures.FILENAME_03, output[2] - 4*60 + 1) + _assert_correct_last_attempt(self, self.filter, output) + def testGetFailures04(self): output = [('212.41.96.186', 4, 1124013600.0), ('212.41.96.185', 4, 1124017198.0)]