mirror of https://github.com/fail2ban/fail2ban
optimized to better usage of the last time template (parse part of line at the same place as last time, if enclosed in the same boundaries)
thereby follow rule "shortest distance to datetime should win", so possible collision causes search though all templates; speedup it a little bit (not possible collision if distance <= 1 or if line-begin anchoring, so break search if such template found)pull/1583/head
parent
b9033d004e
commit
f56ff5f48b
|
@ -26,7 +26,7 @@ import time
|
|||
|
||||
from threading import Lock
|
||||
|
||||
from .datetemplate import DatePatternRegex, DateTai64n, DateEpoch
|
||||
from .datetemplate import DateTemplate, DatePatternRegex, DateTai64n, DateEpoch
|
||||
from ..helpers import getLogger
|
||||
|
||||
# Gets the instance of the logger.
|
||||
|
@ -142,6 +142,10 @@ class DateDetectorTemplate(object):
|
|||
# the last distance to date-match within the log file:
|
||||
self.distance = 0x7fffffff
|
||||
|
||||
@property
|
||||
def weight(self):
|
||||
return self.hits * self.template.weight / max(1, self.distance)
|
||||
|
||||
def __getattr__(self, name):
|
||||
""" Returns attribute of template (called for parameters not in slots)
|
||||
"""
|
||||
|
@ -158,13 +162,14 @@ class DateDetector(object):
|
|||
_defCache = DateDetectorCache()
|
||||
|
||||
def __init__(self):
|
||||
self.__lock = Lock()
|
||||
self.__templates = list()
|
||||
self.__known_names = set()
|
||||
# time the template was long unused (currently 300 == 5m):
|
||||
self.__unusedTime = 300
|
||||
# last known distance:
|
||||
self.__lastDistance = 0
|
||||
# last known distance (bypass one char collision) and end position:
|
||||
self.__lastPos = 1, None
|
||||
self.__lastEndPos = 0x7fffffff, None
|
||||
self.__lastTemplIdx = 0x7fffffff
|
||||
# first free place:
|
||||
self.__firstUnused = 0
|
||||
|
||||
|
@ -198,9 +203,8 @@ class DateDetector(object):
|
|||
def addDefaultTemplate(self):
|
||||
"""Add Fail2Ban's default set of date templates.
|
||||
"""
|
||||
with self.__lock:
|
||||
for template in DateDetector._defCache.templates:
|
||||
self._appendTemplate(template)
|
||||
for template in DateDetector._defCache.templates:
|
||||
self._appendTemplate(template)
|
||||
|
||||
@property
|
||||
def templates(self):
|
||||
|
@ -226,29 +230,59 @@ class DateDetector(object):
|
|||
The regex match returned from the first successfully matched
|
||||
template.
|
||||
"""
|
||||
#logSys.log(logLevel, "try to match time for line: %.250s", line)
|
||||
match = None
|
||||
i = 0
|
||||
found = None, 0x7fffffff, -1
|
||||
with self.__lock:
|
||||
# first try to use last template with same start/end position:
|
||||
i = self.__lastTemplIdx
|
||||
if i < len(self.__templates):
|
||||
ddtempl = self.__templates[i]
|
||||
template = ddtempl.template
|
||||
distance, endpos = self.__lastPos[0], self.__lastEndPos[0]
|
||||
# check same boundaries left/right, otherwise possible collision/pattern switch:
|
||||
if (line[distance-1:distance] == self.__lastPos[1] and
|
||||
line[endpos:endpos+1] == self.__lastEndPos[1]
|
||||
):
|
||||
if logSys.getEffectiveLevel() <= logLevel-1:
|
||||
logSys.log(logLevel-1, " try to match last template #%02i (from %r to %r): ... %s ...",
|
||||
i, distance, endpos, line[distance:endpos])
|
||||
match = template.matchDate(line, distance, endpos)
|
||||
if match:
|
||||
distance = match.start()
|
||||
endpos = match.end()
|
||||
# if different position, possible collision/pattern switch:
|
||||
if distance == self.__lastPos[0] and endpos == self.__lastEndPos[0]:
|
||||
logSys.log(logLevel, " matched last time template #%02i", i)
|
||||
else:
|
||||
logSys.log(logLevel, " ** last pattern collision - pattern change, search ...")
|
||||
match = None
|
||||
# search template and better match:
|
||||
if not match:
|
||||
self.__lastTemplIdx = 0x7fffffff
|
||||
logSys.log(logLevel, " search template ...")
|
||||
found = None, 0x7fffffff, -1
|
||||
i = 0
|
||||
for ddtempl in self.__templates:
|
||||
template = ddtempl.template
|
||||
match = template.matchDate(line)
|
||||
if match is not None:
|
||||
distance = max(1, match.start() + 1)
|
||||
if match:
|
||||
distance = match.start()
|
||||
endpos = match.end()
|
||||
if logSys.getEffectiveLevel() <= logLevel:
|
||||
logSys.log(logLevel, " matched time template #%r (at %r <= %r, %r) %s",
|
||||
i, distance, ddtempl.distance, self.__lastDistance, template.name)
|
||||
logSys.log(logLevel, " matched time template #%02i (at %r <= %r, %r) %s",
|
||||
i, distance, ddtempl.distance, self.__lastPos[0], template.name)
|
||||
## if line-begin anchored - stop searching:
|
||||
if template.flags & DateTemplate.LINE_BEGIN:
|
||||
break
|
||||
## [grave] if distance changed, possible date-match was found somewhere
|
||||
## in body of message, so save this template, and search further:
|
||||
if (
|
||||
(distance > ddtempl.distance or distance > self.__lastDistance) and
|
||||
(distance > ddtempl.distance or distance > self.__lastPos[0]) and
|
||||
len(self.__templates) > 1
|
||||
):
|
||||
if logSys.getEffectiveLevel() <= logLevel:
|
||||
logSys.log(logLevel, " ** distance collision - pattern change, reserve")
|
||||
logSys.log(logLevel, " ** distance collision - pattern change, reserve")
|
||||
## shortest of both:
|
||||
if distance < found[1]:
|
||||
found = match, distance, i
|
||||
found = match, distance, endpos, i
|
||||
## search further:
|
||||
match = None
|
||||
i += 1
|
||||
|
@ -257,25 +291,29 @@ class DateDetector(object):
|
|||
break
|
||||
i += 1
|
||||
# check other template was found (use this one with shortest distance):
|
||||
if match is None and found[0]:
|
||||
match, distance, i = found
|
||||
if not match and found[0]:
|
||||
match, distance, endpos, i = found
|
||||
logSys.log(logLevel, " use best time template #%02i", i)
|
||||
ddtempl = self.__templates[i]
|
||||
template = ddtempl.template
|
||||
# we've winner, incr hits, set distance, usage, reorder, etc:
|
||||
if match is not None:
|
||||
ddtempl.hits += 1
|
||||
self.__lastDistance = ddtempl.distance = distance
|
||||
ddtempl.lastUsed = time.time()
|
||||
if self.__firstUnused == i:
|
||||
self.__firstUnused += 1
|
||||
# if not first - try to reorder current template (bubble up), they will be not sorted anymore:
|
||||
if i:
|
||||
logSys.log(logLevel, " -> reorder template #%r, hits: %r", i, ddtempl.hits)
|
||||
self._reorderTemplate(i)
|
||||
# return tuple with match and template reference used for parsing:
|
||||
return (match, template)
|
||||
# we've winner, incr hits, set distance, usage, reorder, etc:
|
||||
if match:
|
||||
ddtempl.hits += 1
|
||||
ddtempl.lastUsed = time.time()
|
||||
ddtempl.distance = distance
|
||||
if self.__firstUnused == i:
|
||||
self.__firstUnused += 1
|
||||
self.__lastPos = distance, line[distance-1:distance]
|
||||
self.__lastEndPos = endpos, line[endpos:endpos+1]
|
||||
# if not first - try to reorder current template (bubble up), they will be not sorted anymore:
|
||||
if i:
|
||||
i = self._reorderTemplate(i)
|
||||
self.__lastTemplIdx = i
|
||||
# return tuple with match and template reference used for parsing:
|
||||
return (match, template)
|
||||
|
||||
# not found:
|
||||
logSys.log(logLevel, " no template.")
|
||||
return (None, None)
|
||||
|
||||
def getTime(self, line, timeMatch=None):
|
||||
|
@ -311,7 +349,8 @@ class DateDetector(object):
|
|||
date[0], date[1].group(), template.name)
|
||||
return date
|
||||
except ValueError:
|
||||
return None
|
||||
pass
|
||||
return None
|
||||
|
||||
def _reorderTemplate(self, num):
|
||||
"""Reorder template (bubble up) in template list if hits grows enough.
|
||||
|
@ -324,29 +363,36 @@ class DateDetector(object):
|
|||
if num:
|
||||
templates = self.__templates
|
||||
ddtempl = templates[num]
|
||||
if logSys.getEffectiveLevel() <= logLevel:
|
||||
logSys.log(logLevel, " -> reorder template #%02i, hits: %r", num, ddtempl.hits)
|
||||
## current hits and time the template was long unused:
|
||||
untime = ddtempl.lastUsed - self.__unusedTime
|
||||
weight = ddtempl.hits * ddtempl.template.weight / ddtempl.distance
|
||||
weight = ddtempl.weight
|
||||
## try to move faster (first if unused available, or half of part to current template position):
|
||||
pos = self.__firstUnused if self.__firstUnused < num else num // 2
|
||||
pweight = templates[pos].hits * templates[pos].template.weight / templates[pos].distance
|
||||
pweight = templates[pos].weight
|
||||
## don't move too often (multiline logs resp. log's with different date patterns),
|
||||
## if template not used too long, replace it also :
|
||||
logSys.log(logLevel, " -> compare template #%r & #%r, weight %r > %r, hits %r > %r",
|
||||
num, pos, weight, pweight, ddtempl.hits, templates[pos].hits)
|
||||
if logSys.getEffectiveLevel() <= logLevel:
|
||||
logSys.log(logLevel, " -> compare template #%02i & #%02i, weight %.3f > %.3f, hits %r > %r",
|
||||
num, pos, weight, pweight, ddtempl.hits, templates[pos].hits)
|
||||
if not pweight or weight > pweight or templates[pos].lastUsed < untime:
|
||||
## if not larger (and target position recently used) - move slow (exact 1 position):
|
||||
if weight <= pweight and templates[pos].lastUsed > untime:
|
||||
pos = num-1
|
||||
## if still smaller and template at position used, don't move:
|
||||
pweight = templates[pos].hits * templates[pos].template.weight / templates[pos].distance
|
||||
logSys.log(logLevel, " -> compare template #%r & #%r, weight %r > %r, hits %r > %r",
|
||||
num, pos, weight, pweight, ddtempl.hits, templates[pos].hits)
|
||||
pweight = templates[pos].weight
|
||||
if logSys.getEffectiveLevel() <= logLevel:
|
||||
logSys.log(logLevel, " -> compare template #%02i & #%02i, weight %.3f > %.3f, hits %r > %r",
|
||||
num, pos, weight, pweight, ddtempl.hits, templates[pos].hits)
|
||||
if weight < pweight and templates[pos].lastUsed > untime:
|
||||
return
|
||||
del templates[num]
|
||||
templates[pos:0] = [ddtempl]
|
||||
## correct first unused:
|
||||
if pos == self.__firstUnused:
|
||||
while self.__firstUnused < len(templates) and templates[self.__firstUnused].hits:
|
||||
self.__firstUnused += 1
|
||||
logSys.log(logLevel, " -> moved template #%r -> #%r", num, pos)
|
||||
if logSys.getEffectiveLevel() <= logLevel:
|
||||
logSys.log(logLevel, " -> moved template #%02i -> #%02i", num, pos)
|
||||
return pos
|
||||
return num
|
||||
|
|
|
@ -48,9 +48,14 @@ class DateTemplate(object):
|
|||
regex
|
||||
"""
|
||||
|
||||
LINE_BEGIN = 8
|
||||
WORD_BEGIN = 2
|
||||
WORD_END = 1
|
||||
|
||||
def __init__(self):
|
||||
self.name = ""
|
||||
self.weight = 1.0
|
||||
self.flags = 0
|
||||
self._regex = ""
|
||||
self._cRegex = None
|
||||
|
||||
|
@ -83,12 +88,14 @@ class DateTemplate(object):
|
|||
regex = regex.strip()
|
||||
# if word or line start boundary:
|
||||
if wordBegin and not RE_NO_WRD_BOUND_BEG.search(regex):
|
||||
self.flags |= DateTemplate.WORD_BEGIN if wordBegin != 'start' else DateTemplate.LINE_BEGIN
|
||||
regex = (r'(?=^|\b|\W)' if wordBegin != 'start' else r"(?:^|(?<=^\W)|(?<=^\W{2}))") + regex
|
||||
self.name = ('{*WD-BEG}' if wordBegin != 'start' else '{^LN-BEG}') + self.name
|
||||
# if word end boundary:
|
||||
if wordEnd and not RE_NO_WRD_BOUND_END.search(regex):
|
||||
self.flags |= DateTemplate.WORD_END
|
||||
regex += r'(?=\b|\W|$)'
|
||||
self.name += ('{*WD-END}' if wordEnd else '')
|
||||
self.name += '{*WD-END}'
|
||||
# remove possible special pattern "**" in front and end of regex:
|
||||
regex = RE_DEL_WRD_BOUNDS.sub('', regex)
|
||||
self._regex = regex
|
||||
|
@ -97,12 +104,18 @@ class DateTemplate(object):
|
|||
"""Regex used to search for date.
|
||||
""")
|
||||
|
||||
def matchDate(self, line):
|
||||
"""Check if regex for date matches on a log line.
|
||||
def _compileRegex(self):
|
||||
"""Compile regex by first usage.
|
||||
"""
|
||||
if not self._cRegex:
|
||||
self._cRegex = re.compile(self.regex, re.UNICODE | re.IGNORECASE)
|
||||
dateMatch = self._cRegex.search(line)
|
||||
|
||||
def matchDate(self, line, *args):
|
||||
"""Check if regex for date matches on a log line.
|
||||
"""
|
||||
if not self._cRegex:
|
||||
self._compileRegex()
|
||||
dateMatch = self._cRegex.search(line, *args); # pos, endpos
|
||||
return dateMatch
|
||||
|
||||
@abstractmethod
|
||||
|
|
|
@ -42,17 +42,20 @@ class DateDetectorTest(LogCaptureTestCase):
|
|||
def setUp(self):
|
||||
"""Call before every test case."""
|
||||
LogCaptureTestCase.setUp(self)
|
||||
self.__old_eff_level = datedetector.logLevel
|
||||
datedetector.logLevel = logSys.getEffectiveLevel()
|
||||
setUpMyTime()
|
||||
self.__datedetector = DateDetector()
|
||||
self.__datedetector.addDefaultTemplate()
|
||||
self.__datedetector = None
|
||||
|
||||
def tearDown(self):
|
||||
"""Call after every test case."""
|
||||
LogCaptureTestCase.tearDown(self)
|
||||
datedetector.logLevel = self.__old_eff_level
|
||||
tearDownMyTime()
|
||||
|
||||
@property
|
||||
def datedetector(self):
|
||||
if self.__datedetector is None:
|
||||
self.__datedetector = DateDetector()
|
||||
self.__datedetector.addDefaultTemplate()
|
||||
return self.__datedetector
|
||||
|
||||
def testGetEpochTime(self):
|
||||
# correct epoch time, using all variants:
|
||||
|
@ -60,7 +63,7 @@ class DateDetectorTest(LogCaptureTestCase):
|
|||
for date in ("%s", "[%s]", "[%s.555]", "audit(%s.555:101)"):
|
||||
date = date % dateUnix
|
||||
log = date + " [sshd] error: PAM: Authentication failure"
|
||||
datelog = self.__datedetector.getTime(log)
|
||||
datelog = self.datedetector.getTime(log)
|
||||
self.assertTrue(datelog, "Parse epoch time for %s failed" % (date,))
|
||||
( datelog, matchlog ) = datelog
|
||||
self.assertEqual(int(datelog), dateUnix)
|
||||
|
@ -70,7 +73,7 @@ class DateDetectorTest(LogCaptureTestCase):
|
|||
for date in ("%s", "[%s]", "[%s.555]", "audit(%s.555:101)"):
|
||||
date = date % dateUnix
|
||||
log = date + " [sshd] error: PAM: Authentication failure"
|
||||
datelog = self.__datedetector.getTime(log)
|
||||
datelog = self.datedetector.getTime(log)
|
||||
self.assertFalse(datelog)
|
||||
|
||||
def testGetTime(self):
|
||||
|
@ -80,7 +83,7 @@ class DateDetectorTest(LogCaptureTestCase):
|
|||
# is not correctly determined atm, since year is not present
|
||||
# in the log entry. Since this doesn't effect the operation
|
||||
# of fail2ban -- we just ignore incorrect day of the week
|
||||
( datelog, matchlog ) = self.__datedetector.getTime(log)
|
||||
( datelog, matchlog ) = self.datedetector.getTime(log)
|
||||
self.assertEqual(datelog, dateUnix)
|
||||
self.assertEqual(matchlog.group(), 'Jan 23 21:59:59')
|
||||
|
||||
|
@ -140,7 +143,7 @@ class DateDetectorTest(LogCaptureTestCase):
|
|||
if not bound and prefix == "word-boundary": continue
|
||||
logSys.debug(' -- test %-5s for %r', should_match, log)
|
||||
# with getTime:
|
||||
logtime = self.__datedetector.getTime(log)
|
||||
logtime = self.datedetector.getTime(log)
|
||||
if should_match:
|
||||
self.assertNotEqual(logtime, None,
|
||||
"getTime retrieved nothing: failure for %s by prefix %r, anchored: %r, log: %s" % ( sdate, prefix, anchored, log))
|
||||
|
@ -152,8 +155,8 @@ class DateDetectorTest(LogCaptureTestCase):
|
|||
self.assertEqual(logtime, None,
|
||||
"getTime should have not matched for %r by prefix %r Got: %s" % (sdate, prefix, logtime))
|
||||
# with getTime(matchTime) - this combination used in filter:
|
||||
(timeMatch, template) = matchTime = self.__datedetector.matchTime(log)
|
||||
logtime = self.__datedetector.getTime(log, matchTime)
|
||||
(timeMatch, template) = matchTime = self.datedetector.matchTime(log)
|
||||
logtime = self.datedetector.getTime(log, matchTime)
|
||||
logSys.debug(' -- found - %r', template.name if timeMatch else False)
|
||||
if should_match:
|
||||
self.assertNotEqual(logtime, None,
|
||||
|
@ -168,26 +171,26 @@ class DateDetectorTest(LogCaptureTestCase):
|
|||
logSys.debug(' -- OK')
|
||||
|
||||
def testAllUniqueTemplateNames(self):
|
||||
self.assertRaises(ValueError, self.__datedetector.appendTemplate,
|
||||
self.__datedetector.templates[0])
|
||||
self.assertRaises(ValueError, self.datedetector.appendTemplate,
|
||||
self.datedetector.templates[0])
|
||||
|
||||
def testFullYearMatch_gh130(self):
|
||||
# see https://github.com/fail2ban/fail2ban/pull/130
|
||||
# yoh: unfortunately this test is not really effective to reproduce the
|
||||
# situation but left in place to assure consistent behavior
|
||||
mu = time.mktime(datetime.datetime(2012, 10, 11, 2, 37, 17).timetuple())
|
||||
logdate = self.__datedetector.getTime('2012/10/11 02:37:17 [error] 18434#0')
|
||||
logdate = self.datedetector.getTime('2012/10/11 02:37:17 [error] 18434#0')
|
||||
self.assertNotEqual(logdate, None)
|
||||
( logTime, logMatch ) = logdate
|
||||
self.assertEqual(logTime, mu)
|
||||
self.assertEqual(logMatch.group(), '2012/10/11 02:37:17')
|
||||
# confuse it with year being at the end
|
||||
for i in xrange(10):
|
||||
( logTime, logMatch ) = self.__datedetector.getTime('11/10/2012 02:37:17 [error] 18434#0')
|
||||
( logTime, logMatch ) = self.datedetector.getTime('11/10/2012 02:37:17 [error] 18434#0')
|
||||
self.assertEqual(logTime, mu)
|
||||
self.assertEqual(logMatch.group(), '11/10/2012 02:37:17')
|
||||
# and now back to the original
|
||||
( logTime, logMatch ) = self.__datedetector.getTime('2012/10/11 02:37:17 [error] 18434#0')
|
||||
( logTime, logMatch ) = self.datedetector.getTime('2012/10/11 02:37:17 [error] 18434#0')
|
||||
self.assertEqual(logTime, mu)
|
||||
self.assertEqual(logMatch.group(), '2012/10/11 02:37:17')
|
||||
|
||||
|
@ -199,8 +202,7 @@ class DateDetectorTest(LogCaptureTestCase):
|
|||
self.assertEqual(t.matchDate('aaaac').group(), 'aaaac')
|
||||
|
||||
def testAmbiguousInOrderedTemplates(self):
|
||||
dd = DateDetector()
|
||||
dd.addDefaultTemplate()
|
||||
dd = self.datedetector
|
||||
for (debit, line, cnt) in (
|
||||
# shortest distance to datetime should win:
|
||||
("030324 0:03:59", "some free text 030324 0:03:59 -- 2003-03-07 17:05:01 ...", 1),
|
||||
|
@ -224,6 +226,25 @@ class DateDetectorTest(LogCaptureTestCase):
|
|||
self.assertTrue(match)
|
||||
self.assertEqual(match.group(), debit)
|
||||
|
||||
def testLowLevelLogging(self):
|
||||
# test coverage for the deep (heavy) debug messages:
|
||||
try:
|
||||
self.__old_eff_level = datedetector.logLevel
|
||||
if datedetector.logLevel < logSys.getEffectiveLevel()+1:
|
||||
datedetector.logLevel = logSys.getEffectiveLevel()+1
|
||||
dd = self.datedetector
|
||||
i = 0
|
||||
for (line, cnt) in (
|
||||
("server mysqld[5906]: 2005-10-07 06:09:%02i 5907 [Warning] Access denied", 2),
|
||||
("server mysqld[5906]: 051007 06:10:%02i 5907 [Warning] Access denied", 5),
|
||||
("server mysqld[5906]: 2005-10-07 06:09:%02i 5907 [Warning] Access denied", 10),
|
||||
):
|
||||
for i in range(i, i+cnt+1):
|
||||
logSys.debug('== test: %r', (line % i, cnt))
|
||||
match, template = dd.matchTime(line % i)
|
||||
self.assertTrue(match)
|
||||
finally:
|
||||
datedetector.logLevel = self.__old_eff_level
|
||||
|
||||
iso8601 = DatePatternRegex("%Y-%m-%d[T ]%H:%M:%S(?:\.%f)?%z")
|
||||
|
||||
|
|
Loading…
Reference in New Issue