From 53a30a2d42fa9cc114335cc7328651637f12162c Mon Sep 17 00:00:00 2001 From: sebres Date: Tue, 16 Sep 2014 13:50:32 +0200 Subject: [PATCH] prevent completely read of big files first time (after start of service), initial seek to start time using half-interval search algorithm (see issue #795) --- fail2ban/server/filter.py | 92 +++++++++++++++++++++++++++++- fail2ban/server/filtergamin.py | 2 +- fail2ban/server/filterpoll.py | 4 +- fail2ban/server/filterpyinotify.py | 2 +- fail2ban/tests/filtertestcase.py | 9 +++ 5 files changed, 104 insertions(+), 5 deletions(-) diff --git a/fail2ban/server/filter.py b/fail2ban/server/filter.py index e235909b..d63fef5f 100644 --- a/fail2ban/server/filter.py +++ b/fail2ban/server/filter.py @@ -21,7 +21,7 @@ __author__ = "Cyril Jaquier and Fail2Ban Contributors" __copyright__ = "Copyright (c) 2004 Cyril Jaquier, 2011-2013 Yaroslav Halchenko" __license__ = "GPL" -import re, os, fcntl, sys, locale, codecs, datetime +import re, os, fcntl, sys, locale, codecs, datetime, logging from .failmanager import FailManagerEmpty, FailManager from .observer import Observers @@ -653,7 +653,7 @@ class FileFilter(Filter): # MyTime.time()-self.findTime. When a failure is detected, a FailTicket # is created and is added to the FailManager. - def getFailures(self, filename): + def getFailures(self, filename, startTime = None): container = self.getFileContainer(filename) if container is None: logSys.error("Unable to get failures in " + filename) @@ -675,6 +675,11 @@ class FileFilter(Filter): logSys.exception(e) return False + # prevent completely read of big files first time (after start of service), initial seek to start time using half-interval search algorithm: + if container.getPos() == 0 and startTime is not None: + # startTime = MyTime.time() - self.getFindTime() + self.seekToTime(container, startTime) + # yoh: has_content is just a bool, so do not expect it to # change -- loop is exited upon break, and is not entered at # all if upon container opening that one was empty. If we @@ -692,6 +697,76 @@ class FileFilter(Filter): db.updateLog(self.jail, container) return True + ## + # Seeks to line with date (search using half-interval search algorithm), to start polling from it + # + + def seekToTime(self, container, date): + fs = container.getFileSize() + if logSys.getEffectiveLevel() <= logging.DEBUG: + logSys.debug("Seek to find time %s (%s), file size %s", date, + datetime.datetime.fromtimestamp(date).strftime("%Y-%m-%d %H:%M:%S"), fs) + date -= 0.009 + minp = 0 + maxp = fs + lastpos = 0 + lastFew = 0 + lastTime = None + cntr = 0 + unixTime = None + lasti = 0 + movecntr = 3 + while maxp > minp: + i = minp + (maxp - minp) / 2 + pos = container.seek(i) + cntr += 1 + # within next 5 lines try to find any legal datetime: + lncntr = 5; + dateTimeMatch = None + llen = 0 + i = pos + while True: + line = container.readline() + if not line: + break + llen += len(line) + l = line.rstrip('\r\n') + timeMatch = self.dateDetector.matchTime(l) + if timeMatch: + dateTimeMatch = self.dateDetector.getTime(l[timeMatch.start():timeMatch.end()]) + if not dateTimeMatch and lncntr: + lncntr -= 1 + continue + break + # if we can't move (position not changed) + if i + llen == lasti: + movecntr -= 1 + if movecntr <= 0: + break + lasti = i + llen; + # not found at this step - stop searching + if not dateTimeMatch: + break + unixTime = dateTimeMatch[0] + if round(unixTime) == round(date): + break + if unixTime >= date: + maxp = i + else: + minp = i + llen + lastFew = pos; + lastTime = unixTime + lastpos = pos + # if found position have a time greater as given - use smallest time we have found + if unixTime is None or unixTime > date: + unixTime = lastTime + lastpos = container.seek(lastFew, False) + else: + lastpos = container.seek(lastpos, False) + if logSys.getEffectiveLevel() <= logging.DEBUG: + logSys.debug("Position %s from %s, found time %s (%s) within %s seeks", lastpos, fs, unixTime, + (datetime.datetime.fromtimestamp(unixTime).strftime("%Y-%m-%d %H:%M:%S") if unixTime is not None else ''), cntr) + @property def status(self): """Status of Filter plus files being monitored. @@ -744,6 +819,9 @@ class FileContainer: def getFileName(self): return self.__filename + def getFileSize(self): + return os.path.getsize(self.__filename); + def setEncoding(self, encoding): codecs.lookup(encoding) # Raises LookupError if invalid self.__encoding = encoding @@ -790,6 +868,16 @@ class FileContainer: self.__handler.seek(self.__pos) return True + def seek(self, offs, endLine = True): + h = self.__handler + # seek to given position + h.seek(offs, 0) + # goto end of next line + if endLine: + h.readline() + # get current real position + return h.tell() + def readline(self): if self.__handler is None: return "" diff --git a/fail2ban/server/filtergamin.py b/fail2ban/server/filtergamin.py index c9e8e31c..6d80d403 100644 --- a/fail2ban/server/filtergamin.py +++ b/fail2ban/server/filtergamin.py @@ -76,7 +76,7 @@ class FilterGamin(FileFilter): TODO -- RF: this is a common logic and must be shared/provided by FileFilter """ - self.getFailures(path) + self.getFailures(path, MyTime.time() - self.getFindTime()) try: while True: ticket = self.failManager.toBan() diff --git a/fail2ban/server/filterpoll.py b/fail2ban/server/filterpoll.py index f37be431..90fd2b20 100644 --- a/fail2ban/server/filterpoll.py +++ b/fail2ban/server/filterpoll.py @@ -83,6 +83,7 @@ class FilterPoll(FileFilter): # @return True when the thread exits nicely def run(self): + cntr = 0 while self.active: if logSys.getEffectiveLevel() <= 6: logSys.log(6, "Woke up idle=%s with %d files monitored", @@ -92,7 +93,8 @@ class FilterPoll(FileFilter): for container in self.getLogPath(): filename = container.getFileName() if self.isModified(filename): - self.getFailures(filename) + self.getFailures(filename, (MyTime.time() - self.getFindTime()) if not cntr else None) + cntr += 1 self.__modified = True if self.__modified: diff --git a/fail2ban/server/filterpyinotify.py b/fail2ban/server/filterpyinotify.py index 784a7e53..0fa3b026 100644 --- a/fail2ban/server/filterpyinotify.py +++ b/fail2ban/server/filterpyinotify.py @@ -102,7 +102,7 @@ class FilterPyinotify(FileFilter): TODO -- RF: this is a common logic and must be shared/provided by FileFilter """ - self.getFailures(path) + self.getFailures(path, MyTime.time() - self.getFindTime()) try: while True: ticket = self.failManager.toBan() diff --git a/fail2ban/tests/filtertestcase.py b/fail2ban/tests/filtertestcase.py index 1fa3116e..a462cf80 100644 --- a/fail2ban/tests/filtertestcase.py +++ b/fail2ban/tests/filtertestcase.py @@ -872,6 +872,15 @@ class GetFailures(unittest.TestCase): self.filter.getFailures(GetFailures.FILENAME_03) _assert_correct_last_attempt(self, self.filter, output) + def testGetFailures03_seek(self): + # same test as above but with seek to 'Aug 14 11:55:04' - so other output ... + output = ('203.162.223.135', 5, 1124013544.0) + + self.filter.addLogPath(GetFailures.FILENAME_03) + self.filter.addFailRegex("error,relay=,.*550 User unknown") + self.filter.getFailures(GetFailures.FILENAME_03, output[2] - 4*60 + 1) + _assert_correct_last_attempt(self, self.filter, output) + def testGetFailures04(self): output = [('212.41.96.186', 4, 1124013600.0), ('212.41.96.185', 4, 1124017198.0)]