mirror of https://github.com/fail2ban/fail2ban
datedetector: matchTime, template list etc. rewritten because of performance degradation (without sorting templates now) - in-place reordering using hits and last used time;
todo: rewrite "reGroupDictStrptime" because may be too slow;pull/1346/head
parent
e065941ac5
commit
3f2b58e973
|
@ -366,7 +366,7 @@ class Fail2banRegex(object):
|
|||
|
||||
def process(self, test_lines):
|
||||
t0 = time.time()
|
||||
for line_no, line in enumerate(test_lines):
|
||||
for line in test_lines:
|
||||
if isinstance(line, tuple):
|
||||
line_datetimestripped, ret = self.testRegex(
|
||||
line[0], line[1])
|
||||
|
@ -398,8 +398,6 @@ class Fail2banRegex(object):
|
|||
self._line_stats.missed_lines_timeextracted.append(line_datetimestripped)
|
||||
self._line_stats.tested += 1
|
||||
|
||||
if line_no % 10 == 0 and self._filter.dateDetector is not None:
|
||||
self._filter.dateDetector.sortTemplate()
|
||||
self._time_elapsed = time.time() - t0
|
||||
|
||||
def printLines(self, ltype):
|
||||
|
|
|
@ -21,6 +21,8 @@ __author__ = "Cyril Jaquier and Fail2Ban Contributors"
|
|||
__copyright__ = "Copyright (c) 2004 Cyril Jaquier"
|
||||
__license__ = "GPL"
|
||||
|
||||
import time
|
||||
|
||||
from threading import Lock
|
||||
|
||||
from .datetemplate import DatePatternRegex, DateTai64n, DateEpoch
|
||||
|
@ -44,6 +46,8 @@ class DateDetector(object):
|
|||
self.__lock = Lock()
|
||||
self.__templates = list()
|
||||
self.__known_names = set()
|
||||
# time the template was long unused (currently 300 == 5m):
|
||||
self.__unusedTime = 300
|
||||
|
||||
def _appendTemplate(self, template):
|
||||
name = template.name
|
||||
|
@ -153,22 +157,29 @@ class DateDetector(object):
|
|||
The regex match returned from the first successfully matched
|
||||
template.
|
||||
"""
|
||||
self.__lock.acquire()
|
||||
try:
|
||||
i = 0
|
||||
with self.__lock:
|
||||
for template in self.__templates:
|
||||
match = template.matchDate(line)
|
||||
if not match is None:
|
||||
if logSys.getEffectiveLevel() <= logLevel:
|
||||
logSys.log(logLevel, "Matched time template %s", template.name)
|
||||
template.hits += 1
|
||||
template.lastUsed = time.time()
|
||||
# if not first - try to reorder current template (bubble up), they will be not sorted anymore:
|
||||
if i:
|
||||
self._reorderTemplate(i)
|
||||
# return tuple with match and template reference used for parsing:
|
||||
return (match, template)
|
||||
return (None, None)
|
||||
finally:
|
||||
self.__lock.release()
|
||||
i += 1
|
||||
# not found:
|
||||
return (None, None)
|
||||
|
||||
def getTime(self, line):
|
||||
"""Attempts to return the date on a log line using templates.
|
||||
|
||||
Obsolete: Use "getTime2" instead.
|
||||
|
||||
This uses the templates' `getDate` method in an attempt to find
|
||||
a date.
|
||||
|
||||
|
@ -183,8 +194,7 @@ class DateDetector(object):
|
|||
The Unix timestamp returned from the first successfully matched
|
||||
template or None if not found.
|
||||
"""
|
||||
self.__lock.acquire()
|
||||
try:
|
||||
with self.__lock:
|
||||
for template in self.__templates:
|
||||
try:
|
||||
date = template.getDate(line)
|
||||
|
@ -197,8 +207,6 @@ class DateDetector(object):
|
|||
except ValueError: # pragma: no cover
|
||||
pass
|
||||
return None
|
||||
finally:
|
||||
self.__lock.release()
|
||||
|
||||
def getTime2(self, line, timeMatch = None):
|
||||
"""Attempts to return the date on a log line using given template.
|
||||
|
@ -232,21 +240,28 @@ class DateDetector(object):
|
|||
return date
|
||||
return self.getTime(line)
|
||||
|
||||
def sortTemplate(self):
|
||||
"""Sort the date templates by number of hits
|
||||
def _reorderTemplate(self, num):
|
||||
"""Reorder template (bubble up) in template list if hits grows enough.
|
||||
|
||||
Sort the template lists using the hits score. This method is not
|
||||
called in this object and thus should be called from time to time.
|
||||
This ensures the most commonly matched templates are checked first,
|
||||
improving performance of matchTime and getTime.
|
||||
Parameters
|
||||
----------
|
||||
num : int
|
||||
Index of template should be moved.
|
||||
"""
|
||||
self.__lock.acquire()
|
||||
try:
|
||||
if logSys.getEffectiveLevel() <= logLevel:
|
||||
logSys.log(logLevel, "Sorting the template list")
|
||||
self.__templates.sort(key=lambda x: x.hits, reverse=True)
|
||||
t = self.__templates[0]
|
||||
if logSys.getEffectiveLevel() <= logLevel:
|
||||
logSys.log(logLevel, "Winning template: %s with %d hits", t.name, t.hits)
|
||||
finally:
|
||||
self.__lock.release()
|
||||
if num:
|
||||
templates = self.__templates
|
||||
template = templates[num]
|
||||
## current hits and time the template was long unused:
|
||||
untime = template.lastUsed - self.__unusedTime
|
||||
hits = template.hits
|
||||
## don't move too often (multiline logs resp. log's with different date patterns),
|
||||
## if template not used too long, replace it also :
|
||||
if hits > templates[num-1].hits + 5 or templates[num-1].lastUsed < untime:
|
||||
## try to move faster (half of part to current template):
|
||||
pos = num // 2
|
||||
## if not larger - move slow (exact 1 position):
|
||||
if hits <= templates[pos].hits or templates[pos].lastUsed < untime:
|
||||
pos = num-1
|
||||
templates[pos], templates[num] = template, templates[pos]
|
||||
|
||||
|
||||
|
|
|
@ -50,6 +50,7 @@ class DateTemplate(object):
|
|||
self._regex = ""
|
||||
self._cRegex = None
|
||||
self.hits = 0
|
||||
self.lastUsed = 0
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
|
|
|
@ -84,7 +84,6 @@ class FilterGamin(FileFilter):
|
|||
self.jail.putFailTicket(ticket)
|
||||
except FailManagerEmpty:
|
||||
self.failManager.cleanup(MyTime.time())
|
||||
self.dateDetector.sortTemplate()
|
||||
self.__modified = False
|
||||
|
||||
##
|
||||
|
|
|
@ -118,7 +118,6 @@ class FilterPoll(FileFilter):
|
|||
self.jail.putFailTicket(ticket)
|
||||
except FailManagerEmpty:
|
||||
self.failManager.cleanup(MyTime.time())
|
||||
self.dateDetector.sortTemplate()
|
||||
self.__modified = False
|
||||
logSys.debug(
|
||||
(self.jail is not None and self.jail.name or "jailless") +
|
||||
|
|
|
@ -108,7 +108,6 @@ class FilterPyinotify(FileFilter):
|
|||
self.jail.putFailTicket(ticket)
|
||||
except FailManagerEmpty:
|
||||
self.failManager.cleanup(MyTime.time())
|
||||
self.dateDetector.sortTemplate()
|
||||
self.__modified = False
|
||||
|
||||
def _addFileWatcher(self, path):
|
||||
|
|
|
@ -143,13 +143,6 @@ class DateDetectorTest(LogCaptureTestCase):
|
|||
else:
|
||||
self.assertEqual(logtime, None, "getTime should have not matched for %r Got: %s" % (sdate, logtime))
|
||||
|
||||
def testStableSortTemplate(self):
|
||||
old_names = [x.name for x in self.__datedetector.templates]
|
||||
self.__datedetector.sortTemplate()
|
||||
# If there were no hits -- sorting should not change the order
|
||||
for old_name, n in zip(old_names, self.__datedetector.templates):
|
||||
self.assertEqual(old_name, n.name) # "Sort must be stable"
|
||||
|
||||
def testAllUniqueTemplateNames(self):
|
||||
self.assertRaises(ValueError, self.__datedetector.appendTemplate,
|
||||
self.__datedetector.templates[0])
|
||||
|
@ -164,13 +157,11 @@ class DateDetectorTest(LogCaptureTestCase):
|
|||
( logTime, logMatch ) = logdate
|
||||
self.assertEqual(logTime, mu)
|
||||
self.assertEqual(logMatch.group(), '2012/10/11 02:37:17')
|
||||
self.__datedetector.sortTemplate()
|
||||
# confuse it with year being at the end
|
||||
for i in xrange(10):
|
||||
( logTime, logMatch ) = self.__datedetector.getTime('11/10/2012 02:37:17 [error] 18434#0')
|
||||
self.assertEqual(logTime, mu)
|
||||
self.assertEqual(logMatch.group(), '11/10/2012 02:37:17')
|
||||
self.__datedetector.sortTemplate()
|
||||
# and now back to the original
|
||||
( logTime, logMatch ) = self.__datedetector.getTime('2012/10/11 02:37:17 [error] 18434#0')
|
||||
self.assertEqual(logTime, mu)
|
||||
|
|
|
@ -87,8 +87,31 @@ def _maxWaitTime(wtime):
|
|||
return wtime
|
||||
|
||||
|
||||
def _tm(time):
|
||||
return datetime.datetime.fromtimestamp(time).strftime("%Y-%m-%d %H:%M:%S")
|
||||
class _tmSerial():
|
||||
_last_s = -0x7fffffff
|
||||
_last_m = -0x7fffffff
|
||||
_str_s = ""
|
||||
_str_m = ""
|
||||
@staticmethod
|
||||
def _tm(time):
|
||||
# ## strftime it too slow for large time serializer :
|
||||
# return datetime.datetime.fromtimestamp(time).strftime("%Y-%m-%d %H:%M:%S")
|
||||
c = _tmSerial
|
||||
sec = (time % 60)
|
||||
if c._last_s == time - sec:
|
||||
return "%s%02u" % (c._str_s, sec)
|
||||
mt = (time % 3600)
|
||||
if c._last_m == time - mt:
|
||||
c._last_s = time - sec
|
||||
c._str_s = "%s%02u:" % (c._str_m, mt // 60)
|
||||
return "%s%02u" % (c._str_s, sec)
|
||||
c._last_m = time - mt
|
||||
c._str_m = datetime.datetime.fromtimestamp(time).strftime("%Y-%m-%d %H:")
|
||||
c._last_s = time - sec
|
||||
c._str_s = "%s%02u:" % (c._str_m, mt // 60)
|
||||
return "%s%02u" % (c._str_s, sec)
|
||||
|
||||
_tm = _tmSerial._tm
|
||||
|
||||
|
||||
def _assert_equal_entries(utest, found, output, count=None):
|
||||
|
@ -245,6 +268,14 @@ class BasicFilter(unittest.TestCase):
|
|||
1)
|
||||
)
|
||||
|
||||
def testTest_tm(self):
|
||||
unittest.F2B.SkipIfFast()
|
||||
## test function "_tm" works correct (returns the same as slow strftime):
|
||||
for i in xrange(1417512352, (1417512352 // 3600 + 3) * 3600):
|
||||
tm = datetime.datetime.fromtimestamp(i).strftime("%Y-%m-%d %H:%M:%S")
|
||||
if _tm(i) != tm:
|
||||
self.assertEqual((_tm(i), i), (tm, i))
|
||||
|
||||
|
||||
class IgnoreIP(LogCaptureTestCase):
|
||||
|
||||
|
|
Loading…
Reference in New Issue