prevent completely read of big files first time (after start of service), initial seek to start time using half-interval search algorithm (see issue #795)

pull/716/head
sebres 2014-09-16 13:50:32 +02:00
parent 62c755c1d5
commit 53a30a2d42
5 changed files with 104 additions and 5 deletions

View File

@ -21,7 +21,7 @@ __author__ = "Cyril Jaquier and Fail2Ban Contributors"
__copyright__ = "Copyright (c) 2004 Cyril Jaquier, 2011-2013 Yaroslav Halchenko" __copyright__ = "Copyright (c) 2004 Cyril Jaquier, 2011-2013 Yaroslav Halchenko"
__license__ = "GPL" __license__ = "GPL"
import re, os, fcntl, sys, locale, codecs, datetime import re, os, fcntl, sys, locale, codecs, datetime, logging
from .failmanager import FailManagerEmpty, FailManager from .failmanager import FailManagerEmpty, FailManager
from .observer import Observers from .observer import Observers
@ -653,7 +653,7 @@ class FileFilter(Filter):
# MyTime.time()-self.findTime. When a failure is detected, a FailTicket # MyTime.time()-self.findTime. When a failure is detected, a FailTicket
# is created and is added to the FailManager. # is created and is added to the FailManager.
def getFailures(self, filename): def getFailures(self, filename, startTime = None):
container = self.getFileContainer(filename) container = self.getFileContainer(filename)
if container is None: if container is None:
logSys.error("Unable to get failures in " + filename) logSys.error("Unable to get failures in " + filename)
@ -675,6 +675,11 @@ class FileFilter(Filter):
logSys.exception(e) logSys.exception(e)
return False return False
# prevent completely read of big files first time (after start of service), initial seek to start time using half-interval search algorithm:
if container.getPos() == 0 and startTime is not None:
# startTime = MyTime.time() - self.getFindTime()
self.seekToTime(container, startTime)
# yoh: has_content is just a bool, so do not expect it to # yoh: has_content is just a bool, so do not expect it to
# change -- loop is exited upon break, and is not entered at # change -- loop is exited upon break, and is not entered at
# all if upon container opening that one was empty. If we # all if upon container opening that one was empty. If we
@ -692,6 +697,76 @@ class FileFilter(Filter):
db.updateLog(self.jail, container) db.updateLog(self.jail, container)
return True return True
##
# Seeks to line with date (search using half-interval search algorithm), to start polling from it
#
def seekToTime(self, container, date):
fs = container.getFileSize()
if logSys.getEffectiveLevel() <= logging.DEBUG:
logSys.debug("Seek to find time %s (%s), file size %s", date,
datetime.datetime.fromtimestamp(date).strftime("%Y-%m-%d %H:%M:%S"), fs)
date -= 0.009
minp = 0
maxp = fs
lastpos = 0
lastFew = 0
lastTime = None
cntr = 0
unixTime = None
lasti = 0
movecntr = 3
while maxp > minp:
i = minp + (maxp - minp) / 2
pos = container.seek(i)
cntr += 1
# within next 5 lines try to find any legal datetime:
lncntr = 5;
dateTimeMatch = None
llen = 0
i = pos
while True:
line = container.readline()
if not line:
break
llen += len(line)
l = line.rstrip('\r\n')
timeMatch = self.dateDetector.matchTime(l)
if timeMatch:
dateTimeMatch = self.dateDetector.getTime(l[timeMatch.start():timeMatch.end()])
if not dateTimeMatch and lncntr:
lncntr -= 1
continue
break
# if we can't move (position not changed)
if i + llen == lasti:
movecntr -= 1
if movecntr <= 0:
break
lasti = i + llen;
# not found at this step - stop searching
if not dateTimeMatch:
break
unixTime = dateTimeMatch[0]
if round(unixTime) == round(date):
break
if unixTime >= date:
maxp = i
else:
minp = i + llen
lastFew = pos;
lastTime = unixTime
lastpos = pos
# if found position have a time greater as given - use smallest time we have found
if unixTime is None or unixTime > date:
unixTime = lastTime
lastpos = container.seek(lastFew, False)
else:
lastpos = container.seek(lastpos, False)
if logSys.getEffectiveLevel() <= logging.DEBUG:
logSys.debug("Position %s from %s, found time %s (%s) within %s seeks", lastpos, fs, unixTime,
(datetime.datetime.fromtimestamp(unixTime).strftime("%Y-%m-%d %H:%M:%S") if unixTime is not None else ''), cntr)
@property @property
def status(self): def status(self):
"""Status of Filter plus files being monitored. """Status of Filter plus files being monitored.
@ -744,6 +819,9 @@ class FileContainer:
def getFileName(self): def getFileName(self):
return self.__filename return self.__filename
def getFileSize(self):
return os.path.getsize(self.__filename);
def setEncoding(self, encoding): def setEncoding(self, encoding):
codecs.lookup(encoding) # Raises LookupError if invalid codecs.lookup(encoding) # Raises LookupError if invalid
self.__encoding = encoding self.__encoding = encoding
@ -790,6 +868,16 @@ class FileContainer:
self.__handler.seek(self.__pos) self.__handler.seek(self.__pos)
return True return True
def seek(self, offs, endLine = True):
h = self.__handler
# seek to given position
h.seek(offs, 0)
# goto end of next line
if endLine:
h.readline()
# get current real position
return h.tell()
def readline(self): def readline(self):
if self.__handler is None: if self.__handler is None:
return "" return ""

View File

@ -76,7 +76,7 @@ class FilterGamin(FileFilter):
TODO -- RF: TODO -- RF:
this is a common logic and must be shared/provided by FileFilter this is a common logic and must be shared/provided by FileFilter
""" """
self.getFailures(path) self.getFailures(path, MyTime.time() - self.getFindTime())
try: try:
while True: while True:
ticket = self.failManager.toBan() ticket = self.failManager.toBan()

View File

@ -83,6 +83,7 @@ class FilterPoll(FileFilter):
# @return True when the thread exits nicely # @return True when the thread exits nicely
def run(self): def run(self):
cntr = 0
while self.active: while self.active:
if logSys.getEffectiveLevel() <= 6: if logSys.getEffectiveLevel() <= 6:
logSys.log(6, "Woke up idle=%s with %d files monitored", logSys.log(6, "Woke up idle=%s with %d files monitored",
@ -92,7 +93,8 @@ class FilterPoll(FileFilter):
for container in self.getLogPath(): for container in self.getLogPath():
filename = container.getFileName() filename = container.getFileName()
if self.isModified(filename): if self.isModified(filename):
self.getFailures(filename) self.getFailures(filename, (MyTime.time() - self.getFindTime()) if not cntr else None)
cntr += 1
self.__modified = True self.__modified = True
if self.__modified: if self.__modified:

View File

@ -102,7 +102,7 @@ class FilterPyinotify(FileFilter):
TODO -- RF: TODO -- RF:
this is a common logic and must be shared/provided by FileFilter this is a common logic and must be shared/provided by FileFilter
""" """
self.getFailures(path) self.getFailures(path, MyTime.time() - self.getFindTime())
try: try:
while True: while True:
ticket = self.failManager.toBan() ticket = self.failManager.toBan()

View File

@ -872,6 +872,15 @@ class GetFailures(unittest.TestCase):
self.filter.getFailures(GetFailures.FILENAME_03) self.filter.getFailures(GetFailures.FILENAME_03)
_assert_correct_last_attempt(self, self.filter, output) _assert_correct_last_attempt(self, self.filter, output)
def testGetFailures03_seek(self):
# same test as above but with seek to 'Aug 14 11:55:04' - so other output ...
output = ('203.162.223.135', 5, 1124013544.0)
self.filter.addLogPath(GetFailures.FILENAME_03)
self.filter.addFailRegex("error,relay=<HOST>,.*550 User unknown")
self.filter.getFailures(GetFailures.FILENAME_03, output[2] - 4*60 + 1)
_assert_correct_last_attempt(self, self.filter, output)
def testGetFailures04(self): def testGetFailures04(self):
output = [('212.41.96.186', 4, 1124013600.0), output = [('212.41.96.186', 4, 1124013600.0),
('212.41.96.185', 4, 1124017198.0)] ('212.41.96.185', 4, 1124017198.0)]