From a7d9de8c5264b20f1fa63071f3f4eeed8735b54a Mon Sep 17 00:00:00 2001 From: sebres Date: Tue, 27 Sep 2016 17:53:45 +0200 Subject: [PATCH 01/19] [temp commit] 1st try to optimize datedetector/datetemplate functionality (fix ambiguous resp. misleading date detection if several formats used in log resp. by format switch after restart of some services): * Misleading date patterns defined more precisely (using extended syntax %E[mdHMS] for exact two-digit match) * `filter.d/freeswitch.conf` - Optional prefixes (server, daemon, dual time) if systemd daemon logs used (gh-1548) - User part rewritten to accept IPv6 resp. domain after "@" (gh-1548) --- config/filter.d/freeswitch.conf | 15 +++- fail2ban/server/datedetector.py | 99 +++++++++++++++------- fail2ban/server/datetemplate.py | 32 +++---- fail2ban/server/strptime.py | 16 ++++ fail2ban/tests/datedetectortestcase.py | 112 ++++++++++++++----------- fail2ban/tests/files/logs/freeswitch | 5 ++ 6 files changed, 181 insertions(+), 98 deletions(-) diff --git a/config/filter.d/freeswitch.conf b/config/filter.d/freeswitch.conf index 1ef5a256..29940240 100644 --- a/config/filter.d/freeswitch.conf +++ b/config/filter.d/freeswitch.conf @@ -8,10 +8,21 @@ # IP addresses on your LAN. # +[INCLUDES] + +# Read common prefixes. If any customizations available -- read them from +# common.local +before = common.conf + [Definition] -failregex = ^\.\d+ \[WARNING\] sofia_reg\.c:\d+ SIP auth (failure|challenge) \((REGISTER|INVITE)\) on sofia profile \'[^']+\' for \[.*\] from ip $ - ^\.\d+ \[WARNING\] sofia_reg\.c:\d+ Can't find user \[\d+@\d+\.\d+\.\d+\.\d+\] from $ +_daemon = freeswitch + +# Prefix contains common prefix line (server, daemon, etc.) and 2 datetimes if used systemd backend +_pref_line = ^%(__prefix_line)s(?:\d+-\d+-\d+ \d+:\d+:\d+\.\d+)? + +failregex = %(_pref_line)s \[WARNING\] sofia_reg\.c:\d+ SIP auth (failure|challenge) \((REGISTER|INVITE)\) on sofia profile \'[^']+\' for \[[^\]]*\] from ip $ + %(_pref_line)s \[WARNING\] sofia_reg\.c:\d+ Can't find user \[[^@]+@[^\]]+\] from $ ignoreregex = diff --git a/fail2ban/server/datedetector.py b/fail2ban/server/datedetector.py index 5281fc59..b1f97af7 100644 --- a/fail2ban/server/datedetector.py +++ b/fail2ban/server/datedetector.py @@ -21,6 +21,7 @@ __author__ = "Cyril Jaquier and Fail2Ban Contributors" __copyright__ = "Copyright (c) 2004 Cyril Jaquier" __license__ = "GPL" +import copy import time from threading import Lock @@ -35,6 +36,8 @@ logLevel = 6 class DateDetectorCache(object): + """Implements the caching of the default templates list. + """ def __init__(self): self.__lock = Lock() self.__templates = list() @@ -51,26 +54,44 @@ class DateDetectorCache(object): def _cacheTemplate(self, template): """Cache Fail2Ban's default template. + """ if isinstance(template, str): + # exact given template with word benin-end boundary: template = DatePatternRegex(template) + # additional template, that prefers datetime at start of a line (safety+performance feature): + template2 = copy.copy(template) + if hasattr(template, 'pattern'): + regex = template.pattern + wordEnd = True + else: + regex = template.regex + wordEnd = False + template2.setRegex(regex, wordBegin='start', wordEnd=wordEnd) + if template2.name != template.name: + self.__templates.append(template2) + # add template: self.__templates.append(template) def _addDefaultTemplate(self): """Add resp. cache Fail2Ban's default set of date templates. """ + # ISO 8601, simple date, optional subsecond and timezone: + # 2005-01-23T21:59:59.981746, 2005-01-23 21:59:59 + # simple date: 2005/01/23 21:59:59 + # custom for syslog-ng 2006.12.21 06:43:20 + self._cacheTemplate("%Y(?P<_sep>[-/.])%m(?P=_sep)%d[T ]%H:%M:%S(?:[.,]%f)?(?:\s*%z)?") + # 20050123T215959, 20050123 215959 + self._cacheTemplate("%Y%Em%Ed[T ]%EH%EM%ES(?:[.,]%f)?(?:\s*%z)?") # asctime with optional day, subsecond and/or year: # Sun Jan 23 21:59:59.011 2005 - self._cacheTemplate("(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %Y)?") + # prefixed with optional time zone (monit): + # PDT Apr 16 21:05:29 + self._cacheTemplate("(?:%z )?(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %Y)?") # asctime with optional day, subsecond and/or year coming after day # http://bugs.debian.org/798923 # Sun Jan 23 2005 21:59:59.011 self._cacheTemplate("(?:%a )?%b %d %Y %H:%M:%S(?:\.%f)?") - # simple date, optional subsecond (proftpd): - # 2005-01-23 21:59:59 - # simple date: 2005/01/23 21:59:59 - # custom for syslog-ng 2006.12.21 06:43:20 - self._cacheTemplate("%Y(?P<_sep>[-/.])%m(?P=_sep)%d %H:%M:%S(?:,%f)?") # simple date too (from x11vnc): 23/01/2005 21:59:59 # and with optional year given by 2 digits: 23/01/05 21:59:59 # (See http://bugs.debian.org/537610) @@ -79,37 +100,48 @@ class DateDetectorCache(object): # Apache format optional time zone: # [31/Oct/2006:09:22:55 -0000] # 26-Jul-2007 15:20:52 + # named 26-Jul-2007 15:20:52.252 + # roundcube 26-Jul-2007 15:20:52 +0200 self._cacheTemplate("%d(?P<_sep>[-/])%b(?P=_sep)%Y[ :]?%H:%M:%S(?:\.%f)?(?: %z)?") # CPanel 05/20/2008:01:57:39 self._cacheTemplate("%m/%d/%Y:%H:%M:%S") - # named 26-Jul-2007 15:20:52.252 - # roundcube 26-Jul-2007 15:20:52 +0200 # 01-27-2012 16:22:44.252 # subseconds explicit to avoid possible %m<->%d confusion - # with previous - self._cacheTemplate("%m-%d-%Y %H:%M:%S\.%f") + # with previous ("%d-%m-%Y %H:%M:%S" by "%d(?P<_sep>[-/])%m(?P=_sep)(?:%Y|%y) %H:%M:%S") + self._cacheTemplate("%m-%d-%Y %H:%M:%S(?:\.%f)?") # TAI64N - template = DateTai64n() - template.name = "TAI64N" - self._cacheTemplate(template) + self._cacheTemplate(DateTai64n()) # Epoch - template = DateEpoch() - template.name = "Epoch" - self._cacheTemplate(template) - # ISO 8601 - self._cacheTemplate("%Y-%m-%d[T ]%H:%M:%S(?:\.%f)?(?:%z)?") + self._cacheTemplate(DateEpoch()) # Only time information in the log self._cacheTemplate("^%H:%M:%S") # <09/16/08@05:03:30> self._cacheTemplate("^<%m/%d/%y@%H:%M:%S>") # MySQL: 130322 11:46:11 - self._cacheTemplate("^%y%m%d ?%H:%M:%S") + self._cacheTemplate("%y%Em%Ed ?%H:%M:%S") # Apache Tomcat self._cacheTemplate("%b %d, %Y %I:%M:%S %p") # ASSP: Apr-27-13 02:33:06 self._cacheTemplate("^%b-%d-%y %H:%M:%S") +class DateDetectorTemplate(object): + """Used for "shallow copy" of the template object. + + Prevents collectively usage of hits/lastUsed in cached templates + """ + __slots__ = ('template', 'hits', 'lastUsed') + def __init__(self, template): + self.template = template + self.hits = 0 + self.lastUsed = 0 + + def __getattr__(self, name): + """ Returns attribute of template (called for parameters not in slots) + """ + return getattr(self.template, name) + + class DateDetector(object): """Manages one or more date templates to find a date within a log line. @@ -132,7 +164,7 @@ class DateDetector(object): raise ValueError( "There is already a template with name %s" % name) self.__known_names.add(name) - self.__templates.append(template) + self.__templates.append(DateDetectorTemplate(template)) def appendTemplate(self, template): """Add a date template to manage and use in search of dates. @@ -186,13 +218,14 @@ class DateDetector(object): """ i = 0 with self.__lock: - for template in self.__templates: + for ddtemplate in self.__templates: + template = ddtemplate.template match = template.matchDate(line) if not match is None: if logSys.getEffectiveLevel() <= logLevel: logSys.log(logLevel, "Matched time template %s", template.name) - template.hits += 1 - template.lastUsed = time.time() + ddtemplate.hits += 1 + ddtemplate.lastUsed = time.time() # if not first - try to reorder current template (bubble up), they will be not sorted anymore: if i: self._reorderTemplate(i) @@ -234,7 +267,8 @@ class DateDetector(object): except ValueError: return None with self.__lock: - for template in self.__templates: + for ddtemplate in self.__templates: + template = ddtemplate.template try: date = template.getDate(line) if date is None: @@ -261,14 +295,21 @@ class DateDetector(object): ## current hits and time the template was long unused: untime = template.lastUsed - self.__unusedTime hits = template.hits + ## try to move faster (first 2 if it still unused, or half of part to current template position): + phits = 0 + for pos in (0, 1, num // 2): + phits = templates[pos].hits + if not phits: + break ## don't move too often (multiline logs resp. log's with different date patterns), ## if template not used too long, replace it also : - if hits > templates[num-1].hits + 5 or templates[num-1].lastUsed < untime: - ## try to move faster (half of part to current template): - pos = num // 2 - ## if not larger - move slow (exact 1 position): - if hits <= templates[pos].hits or templates[pos].lastUsed < untime: + if not phits or hits > phits + 5 or templates[pos].lastUsed < untime: + ## if not larger (and target position recently used) - move slow (exact 1 position): + if hits <= phits and templates[pos].lastUsed > untime: pos = num-1 + ## if still smaller and template at position used, don't move: + if hits < templates[pos].hits and templates[pos].lastUsed > untime: + return templates[pos], templates[num] = template, templates[pos] diff --git a/fail2ban/server/datetemplate.py b/fail2ban/server/datetemplate.py index 4f609e78..5fcc16a4 100644 --- a/fail2ban/server/datetemplate.py +++ b/fail2ban/server/datetemplate.py @@ -27,7 +27,7 @@ __license__ = "GPL" import re from abc import abstractmethod -from .strptime import reGroupDictStrptime, timeRE +from .strptime import reGroupDictStrptime, timeRE, getTimePatternRE from ..helpers import getLogger logSys = getLogger(__name__) @@ -49,8 +49,6 @@ class DateTemplate(object): self._name = "" self._regex = "" self._cRegex = None - self.hits = 0 - self.lastUsed = 0 @property def name(self): @@ -88,9 +86,11 @@ class DateTemplate(object): """ regex = regex.strip() if wordBegin and not re.search(r'^\^', regex): - regex = r'(?=^|\b|\W)' + regex + regex = (r'(?<=^|\b)' if wordBegin != 'start' else r"^(?<=\W)?") + regex + self._name = ('[*WD-BEG]' if wordBegin != 'start' else '[^LN-BEG]') + self._name if wordEnd and not re.search(r'\$$', regex): regex += r'(?=\b|\W|$)' + self._name += ('[*WD-END]' if wordEnd else '') self._regex = regex regex = property(getRegex, setRegex, doc= @@ -140,7 +140,9 @@ class DateEpoch(DateTemplate): def __init__(self): DateTemplate.__init__(self) - self.regex = r"(?:^|(?P(?<=^\[))|(?P(?<=audit\()))\d{10,11}\b(?:\.\d{3,6})?(?:(?(selinux)(?=:\d+\)))|(?(square)(?=\])))" + self.name = "Epoch" + self.setRegex(r"(?:^|(?P(?<=^\[))|(?P(?<=\baudit\()))\d{10,11}\b(?:\.\d{3,6})?(?:(?(selinux)(?=:\d+\)))|(?(square)(?=\])))", + wordBegin=False) ;# already line begin resp. word begin anchored def getDate(self, line, dateMatch=None): """Method to return the date for a log line. @@ -178,7 +180,8 @@ class DatePatternRegex(DateTemplate): regex pattern """ - _patternRE = re.compile(r"%%(%%|[%s])" % "".join(timeRE.keys())) + + _patternRE = re.compile(getTimePatternRE()) _patternName = { 'a': "DAY", 'A': "DAYNAME", 'b': "MON", 'B': "MONTH", 'd': "Day", 'H': "24hour", 'I': "12hour", 'j': "Yearday", 'm': "Month", @@ -188,11 +191,11 @@ class DatePatternRegex(DateTemplate): for _key in set(timeRE) - set(_patternName): # may not have them all... _patternName[_key] = "%%%s" % _key - def __init__(self, pattern=None): + def __init__(self, pattern=None, **kwargs): super(DatePatternRegex, self).__init__() self._pattern = None if pattern is not None: - self.pattern = pattern + self.setRegex(pattern, **kwargs) @property def pattern(self): @@ -208,17 +211,13 @@ class DatePatternRegex(DateTemplate): @pattern.setter def pattern(self, pattern): + self.setRegex(pattern) + + def setRegex(self, pattern, wordBegin=True, wordEnd=True): self._pattern = pattern fmt = self._patternRE.sub(r'%(\1)s', pattern) self._name = fmt % self._patternName - super(DatePatternRegex, self).setRegex(fmt % timeRE) - - def setRegex(self, value): - raise NotImplementedError("Regex derived from pattern") - - @DateTemplate.name.setter - def name(self, value): - raise NotImplementedError("Name derived from pattern") + super(DatePatternRegex, self).setRegex(fmt % timeRE, wordBegin, wordEnd) def getDate(self, line, dateMatch=None): """Method to return the date for a log line. @@ -258,6 +257,7 @@ class DateTai64n(DateTemplate): def __init__(self): DateTemplate.__init__(self) + self.name = "TAI64N" # We already know the format for TAI64N # yoh: we should not add an additional front anchor self.setRegex("@[0-9a-f]{24}", wordBegin=False) diff --git a/fail2ban/server/strptime.py b/fail2ban/server/strptime.py index 2e3c051c..c6a1f37e 100644 --- a/fail2ban/server/strptime.py +++ b/fail2ban/server/strptime.py @@ -26,8 +26,24 @@ from .mytime import MyTime locale_time = LocaleTime() timeRE = TimeRE() +#todo: implement literal time zone support like CET, PST, PDT, etc (via pytz): +#timeRE['z'] = r"%s?(?PZ|[+-]\d{2}(?::?[0-5]\d)?|[A-Z]{3})?" % timeRE['Z'] timeRE['z'] = r"(?PZ|[+-]\d{2}(?::?[0-5]\d)?)" +# Extend build-in TimeRE with some exact (two-digit) patterns: +timeRE['Ed'] = r"(?P3[0-1]|[1-2]\d|0[1-9])" +timeRE['Em'] = r"(?P1[0-2]|0[1-9])" +timeRE['EH'] = r"(?P2[0-3]|[0-1]\d)" +timeRE['EM'] = r"(?P[0-5]\d)" +timeRE['ES'] = r"(?P6[0-1]|[0-5]\d)" + +def getTimePatternRE(): + keys = timeRE.keys() + return (r"%%(%%|%s|[%s])" % ( + "|".join([k for k in keys if len(k) > 1]), + "".join([k for k in keys if len(k) == 1]), + )) + def reGroupDictStrptime(found_dict): """Return time from dictionary of strptime fields diff --git a/fail2ban/tests/datedetectortestcase.py b/fail2ban/tests/datedetectortestcase.py index 0c3c306d..013e1e85 100644 --- a/fail2ban/tests/datedetectortestcase.py +++ b/fail2ban/tests/datedetectortestcase.py @@ -89,69 +89,79 @@ class DateDetectorTest(LogCaptureTestCase): """ dateUnix = 1106513999.0 - for anchored, sdate in ( - (False, "Jan 23 21:59:59"), - (False, "Sun Jan 23 21:59:59 2005"), - (False, "Sun Jan 23 21:59:59"), - (False, "Sun Jan 23 2005 21:59:59"), - (False, "2005/01/23 21:59:59"), - (False, "2005.01.23 21:59:59"), - (False, "23/01/2005 21:59:59"), - (False, "23/01/05 21:59:59"), - (False, "23/Jan/2005:21:59:59"), - (False, "23/Jan/2005:21:59:59 +0100"), - (False, "01/23/2005:21:59:59"), - (False, "2005-01-23 21:59:59"), - (False, "2005-01-23 21:59:59,000"), # proftpd - (False, "23-Jan-2005 21:59:59"), - (False, "23-Jan-2005 21:59:59.02"), - (False, "23-Jan-2005 21:59:59 +0100"), - (False, "23-01-2005 21:59:59"), - (True, "1106513999"), # Portsetry - (False, "01-23-2005 21:59:59.252"), # reported on f2b, causes Feb29 fix to break - (False, "@4000000041f4104f00000000"), # TAI64N - (False, "2005-01-23T20:59:59.252Z"), #ISO 8601 (UTC) - (False, "2005-01-23T15:59:59-05:00"), #ISO 8601 with TZ - (False, "2005-01-23T21:59:59"), #ISO 8601 no TZ, assume local - (True, "<01/23/05@21:59:59>"), - (True, "050123 21:59:59"), # MySQL - (True, "Jan-23-05 21:59:59"), # ASSP like - (False, "Jan 23, 2005 9:59:59 PM"), # Apache Tomcat - (True, "1106513999"), # Regular epoch - (True, "1106513999.000"), # Regular epoch with millisec - (False, "audit(1106513999.000:987)"), # SELinux + for anchored, bound, sdate, rdate in ( + (False, True, "Jan 23 21:59:59", None), + (False, False, "Sun Jan 23 21:59:59 2005", None), + (False, False, "Sun Jan 23 21:59:59", None), + (False, False, "Sun Jan 23 2005 21:59:59", None), + (False, True, "2005/01/23 21:59:59", None), + (False, True, "2005.01.23 21:59:59", None), + (False, True, "23/01/2005 21:59:59", None), + (False, True, "23/01/05 21:59:59", None), + (False, True, "23/Jan/2005:21:59:59", None), + (False, True, "23/Jan/2005:21:59:59 +0100", None), + (False, True, "01/23/2005:21:59:59", None), + (False, True, "2005-01-23 21:59:59", None), + (False, True, "2005-01-23 21:59:59,000", None), # proftpd + (False, True, "23-Jan-2005 21:59:59", None), + (False, True, "23-Jan-2005 21:59:59.02", None), + (False, True, "23-Jan-2005 21:59:59 +0100", None), + (False, True, "23-01-2005 21:59:59", None), + (True, True, "1106513999", None), # Portsetry + (False, True, "01-23-2005 21:59:59.252", None), # reported on f2b, causes Feb29 fix to break + (False, False, "@4000000041f4104f00000000", None), # TAI64N + (False, True, "2005-01-23T20:59:59.252Z", None), #ISO 8601 (UTC) + (False, True, "2005-01-23T15:59:59-05:00", None), #ISO 8601 with TZ + (False, True, "2005-01-23 21:59:59", None), #ISO 8601 no TZ, assume local + (False, True, "20050123T215959", None), #Short ISO + (False, True, "20050123 215959", None), #Short ISO + (True, True, "<01/23/05@21:59:59>", None), + (False, True, "050123 21:59:59", None), # MySQL + (True, True, "Jan-23-05 21:59:59", None), # ASSP like + (False, True, "Jan 23, 2005 9:59:59 PM", None), # Apache Tomcat + (True, True, "1106513999", None), # Regular epoch + (True, True, "1106513999.000", None), # Regular epoch with millisec + (True, True, "[1106513999.000]", "1106513999.000"), # epoch squared + (False, True, "audit(1106513999.000:987)", "1106513999.000"), # SELinux + ): + logSys.debug('== test %r', (anchored, bound, sdate)) + for should_match, prefix in ( + (True, ""), + (not anchored, "bogus-prefix "), + (False, "word-boundary") ): - for should_match, prefix in ((True, ""), - (not anchored, "bogus-prefix ")): + if rdate is None: rdate = sdate log = prefix + sdate + "[sshd] error: PAM: Authentication failure" - + # if not allowed boundary test: + if not bound and prefix == "word-boundary": continue + logSys.debug(' -- test %-5s for %r', should_match, log) # with getTime: logtime = self.__datedetector.getTime(log) if should_match: - self.assertNotEqual(logtime, None, "getTime retrieved nothing: failure for %s, anchored: %r, log: %s" % ( sdate, anchored, log)) + self.assertNotEqual(logtime, None, + "getTime retrieved nothing: failure for %s by prefix %r, anchored: %r, log: %s" % ( sdate, prefix, anchored, log)) ( logUnix, logMatch ) = logtime - self.assertEqual(logUnix, dateUnix, "getTime comparison failure for %s: \"%s\" is not \"%s\"" % (sdate, logUnix, dateUnix)) - if sdate.startswith('audit('): - # yes, special case, the group only matches the number - self.assertEqual(logMatch.group(), '1106513999.000') - else: - self.assertEqual(logMatch.group(), sdate) + self.assertEqual(logUnix, dateUnix, + "getTime comparison failure for %s: by prefix %r \"%s\" is not \"%s\"" % (sdate, prefix, logUnix, dateUnix)) + self.assertEqual(logMatch.group(), rdate) else: - self.assertEqual(logtime, None, "getTime should have not matched for %r Got: %s" % (sdate, logtime)) + self.assertEqual(logtime, None, + "getTime should have not matched for %r by prefix %r Got: %s" % (sdate, prefix, logtime)) # with getTime(matchTime) - this combination used in filter: - matchTime = self.__datedetector.matchTime(log) + (timeMatch, template) = matchTime = self.__datedetector.matchTime(log) logtime = self.__datedetector.getTime(log, matchTime) + logSys.debug(' -- found - %r', template.name if timeMatch else False) if should_match: - self.assertNotEqual(logtime, None, "getTime retrieved nothing: failure for %s, anchored: %r, log: %s" % ( sdate, anchored, log)) + self.assertNotEqual(logtime, None, + "getTime retrieved nothing: failure for %s by prefix %r, anchored: %r, log: %s" % ( sdate, prefix, anchored, log)) ( logUnix, logMatch ) = logtime - self.assertEqual(logUnix, dateUnix, "getTime comparison failure for %s: \"%s\" is not \"%s\"" % (sdate, logUnix, dateUnix)) - if sdate.startswith('audit('): - # yes, special case, the group only matches the number - self.assertEqual(logMatch.group(), '1106513999.000') - else: - self.assertEqual(logMatch.group(), sdate) + self.assertEqual(logUnix, dateUnix, + "getTime comparison failure for %s by prefix %r: \"%s\" is not \"%s\"" % (sdate, prefix, logUnix, dateUnix)) + self.assertEqual(logMatch.group(), rdate) else: - self.assertEqual(logtime, None, "getTime should have not matched for %r Got: %s" % (sdate, logtime)) + self.assertEqual(logtime, None, + "getTime should have not matched for %r by prefix %r Got: %s" % (sdate, prefix, logtime)) + logSys.debug(' -- OK') def testAllUniqueTemplateNames(self): self.assertRaises(ValueError, self.__datedetector.appendTemplate, diff --git a/fail2ban/tests/files/logs/freeswitch b/fail2ban/tests/files/logs/freeswitch index f151995c..2579e6f4 100644 --- a/fail2ban/tests/files/logs/freeswitch +++ b/fail2ban/tests/files/logs/freeswitch @@ -9,3 +9,8 @@ 2013-12-31 17:39:54.767815 [WARNING] sofia_reg.c:2531 Can't find user [1001@192.168.2.51] from 5.11.47.236 # failJSON: { "time": "2013-12-31T17:39:54", "match": true, "host": "185.24.234.141" } 2013-12-31 17:39:54.767815 [WARNING] sofia_reg.c:2531 Can't find user [100@192.168.2.51] from 185.24.234.141 + +# failJSON: { "time": "2016-09-25T18:57:58", "match": true, "host": "192.0.2.1", "desc": "Systemd dual time with prefix - 1st expr" } +2016-09-25T18:57:58.150982 www.srv.tld freeswitch[122921]: 2016-09-25 18:57:58.150982 [WARNING] sofia_reg.c:2889 Can't find user [201@::1] from 192.0.2.1 +# failJSON: { "time": "2016-09-25T18:57:58", "match": true, "host": "192.0.2.2", "desc": "Systemd dual time with prefix - 2nd expr" } +2016-09-25T18:57:58.150982 www.srv.tld freeswitch[122921]: 2016-09-25 18:57:58.150982 [WARNING] sofia_reg.c:1720 SIP auth failure (INVITE) on sofia profile 'sipinterface_1' for [9810972597751739@::1] from ip 192.0.2.2 \ No newline at end of file From 84fe55b99b8010c1951fea021a886a6cbbc4de7d Mon Sep 17 00:00:00 2001 From: sebres Date: Wed, 28 Sep 2016 21:17:42 +0200 Subject: [PATCH 02/19] [temp commit] 2nd try to optimize datedetector/datetemplate functionality (almost ready, needs fine tuning) --- fail2ban/client/fail2banregex.py | 40 +++-- fail2ban/server/datedetector.py | 105 ++++++------- fail2ban/server/datetemplate.py | 45 +++--- fail2ban/server/mytime.py | 20 ++- fail2ban/server/strptime.py | 51 +++++- fail2ban/tests/datedetectortestcase.py | 147 +++++++++++++++++- fail2ban/tests/fail2banregextestcase.py | 2 +- fail2ban/tests/files/logs/zzz-generic-example | 8 +- fail2ban/tests/filtertestcase.py | 6 +- fail2ban/tests/misctestcase.py | 89 ----------- fail2ban/tests/servertestcase.py | 2 +- fail2ban/tests/utils.py | 10 +- 12 files changed, 305 insertions(+), 220 deletions(-) diff --git a/fail2ban/client/fail2banregex.py b/fail2ban/client/fail2banregex.py index 4096ac4e..6680b097 100644 --- a/fail2ban/client/fail2banregex.py +++ b/fail2ban/client/fail2banregex.py @@ -122,15 +122,15 @@ Report bugs to https://github.com/fail2ban/fail2ban/issues p.add_options([ Option("-d", "--datepattern", help="set custom pattern used to match date/times"), - Option("-e", "--encoding", + Option("-e", "--encoding", default=PREFER_ENC, help="File encoding. Default: system locale"), - Option("-r", "--raw", action='store_true', + Option("-r", "--raw", action='store_true', default=False, help="Raw hosts, don't resolve dns"), Option("--usedns", action='store', default=None, help="DNS specified replacement of tags in regexp " "('yes' - matches all form of hosts, 'no' - IP addresses only)"), Option("-L", "--maxlines", type=int, default=0, - help="maxlines for multi-line regex"), + help="maxlines for multi-line regex."), Option("-m", "--journalmatch", help="journalctl style matches overriding filter file. " "\"systemd-journal\" only"), @@ -143,6 +143,8 @@ Report bugs to https://github.com/fail2ban/fail2ban/issues help="Increase verbosity"), Option("--verbosity", action="store", dest="verbose", type=int, help="Set numerical level of verbosity (0..4)"), + Option("--verbose-date", "--VD", action='store_true', + help="Verbose date patterns/regex in output"), Option("-D", "--debuggex", action='store_true', help="Produce debuggex.com urls for debugging there"), Option("--print-no-missed", action='store_true', @@ -215,14 +217,8 @@ class LineStats(object): class Fail2banRegex(object): def __init__(self, opts): - self._verbose = opts.verbose - self._debuggex = opts.debuggex - self._maxlines = 20 - self._print_no_missed = opts.print_no_missed - self._print_no_ignored = opts.print_no_ignored - self._print_all_matched = opts.print_all_matched - self._print_all_missed = opts.print_all_missed - self._print_all_ignored = opts.print_all_ignored + # set local protected memebers from given options: + self.__dict__.update(dict(('_'+o,v) for o,v in opts.__dict__.iteritems())) self._maxlines_set = False # so we allow to override maxlines in cmdline self._datepattern_set = False self._journalmatch = None @@ -236,23 +232,20 @@ class Fail2banRegex(object): if opts.maxlines: self.setMaxLines(opts.maxlines) + else: + self._maxlines = 20 if opts.journalmatch is not None: self.setJournalMatch(opts.journalmatch.split()) if opts.datepattern: self.setDatePattern(opts.datepattern) - if opts.encoding: - self.encoding = opts.encoding - else: - self.encoding = PREFER_ENC - self.raw = True if opts.raw else False if opts.usedns: self._filter.setUseDns(opts.usedns) def decode_line(self, line): - return FileContainer.decode_line('', self.encoding, line) + return FileContainer.decode_line('', self._encoding, line) def encode_line(self, line): - return line.encode(self.encoding, 'ignore') + return line.encode(self._encoding, 'ignore') def setDatePattern(self, pattern): if not self._datepattern_set: @@ -350,7 +343,7 @@ class Fail2banRegex(object): orgLineBuffer = self._filter._Filter__lineBuffer fullBuffer = len(orgLineBuffer) >= self._filter.getMaxLines() try: - line, ret = self._filter.processLine(line, date, checkAllRegex=True, returnRawHost=self.raw) + line, ret = self._filter.processLine(line, date, checkAllRegex=True, returnRawHost=self._raw) for match in ret: # Append True/False flag depending if line was matched by # more than one regex @@ -479,8 +472,11 @@ class Fail2banRegex(object): out = [] for template in self._filter.dateDetector.templates: if self._verbose or template.hits: - out.append("[%d] %s" % ( - template.hits, template.name)) + out.append("[%d] %s" % (template.hits, template.name)) + if self._verbose_date: + out.append(" # weight: %3s, pattern: %s" % ( + template.weight, getattr(template, 'pattern', ''),)) + out.append(" # regex: %s" % (getattr(template, 'regex', ''),)) pprint_list(out, "[# of hits] date format") output( "\nLines: %s" % self._line_stats, ) @@ -518,7 +514,7 @@ class Fail2banRegex(object): try: hdlr = open(cmd_log, 'rb') output( "Use log file : %s" % cmd_log ) - output( "Use encoding : %s" % self.encoding ) + output( "Use encoding : %s" % self._encoding ) test_lines = self.file_lines_gen(hdlr) except IOError as e: output( e ) diff --git a/fail2ban/server/datedetector.py b/fail2ban/server/datedetector.py index b1f97af7..d26743a3 100644 --- a/fail2ban/server/datedetector.py +++ b/fail2ban/server/datedetector.py @@ -60,16 +60,15 @@ class DateDetectorCache(object): # exact given template with word benin-end boundary: template = DatePatternRegex(template) # additional template, that prefers datetime at start of a line (safety+performance feature): - template2 = copy.copy(template) - if hasattr(template, 'pattern'): - regex = template.pattern - wordEnd = True - else: - regex = template.regex - wordEnd = False - template2.setRegex(regex, wordBegin='start', wordEnd=wordEnd) - if template2.name != template.name: - self.__templates.append(template2) + if 0 and hasattr(template, 'regex'): + template2 = copy.copy(template) + regex = getattr(template, 'pattern', template.regex) + template2.setRegex(regex, wordBegin='start', wordEnd=True) + if template2.name != template.name: + # increase weight of such templates, because they should be always + # preferred in template sorting process (bubble up): + template2.weight = 100 + self.__templates.append(template2) # add template: self.__templates.append(template) @@ -80,35 +79,35 @@ class DateDetectorCache(object): # 2005-01-23T21:59:59.981746, 2005-01-23 21:59:59 # simple date: 2005/01/23 21:59:59 # custom for syslog-ng 2006.12.21 06:43:20 - self._cacheTemplate("%Y(?P<_sep>[-/.])%m(?P=_sep)%d[T ]%H:%M:%S(?:[.,]%f)?(?:\s*%z)?") + self._cacheTemplate("%ExY(?P<_sep>[-/.])%m(?P=_sep)%d[T ]%H:%M:%S(?:[.,]%f)?(?:\s*%z)?") # 20050123T215959, 20050123 215959 - self._cacheTemplate("%Y%Em%Ed[T ]%EH%EM%ES(?:[.,]%f)?(?:\s*%z)?") + self._cacheTemplate("%ExY%Exm%Exd[T ]%ExH%ExM%ExS(?:[.,]%f)?(?:\s*%z)?") # asctime with optional day, subsecond and/or year: # Sun Jan 23 21:59:59.011 2005 # prefixed with optional time zone (monit): # PDT Apr 16 21:05:29 - self._cacheTemplate("(?:%z )?(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %Y)?") + self._cacheTemplate("(?:%z )?(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?") # asctime with optional day, subsecond and/or year coming after day # http://bugs.debian.org/798923 # Sun Jan 23 2005 21:59:59.011 - self._cacheTemplate("(?:%a )?%b %d %Y %H:%M:%S(?:\.%f)?") + self._cacheTemplate("(?:%a )?%b %d %ExY %H:%M:%S(?:\.%f)?") # simple date too (from x11vnc): 23/01/2005 21:59:59 # and with optional year given by 2 digits: 23/01/05 21:59:59 # (See http://bugs.debian.org/537610) # 17-07-2008 17:23:25 - self._cacheTemplate("%d(?P<_sep>[-/])%m(?P=_sep)(?:%Y|%y) %H:%M:%S") + self._cacheTemplate("%d(?P<_sep>[-/])%m(?P=_sep)(?:%ExY|%Exy) %H:%M:%S") # Apache format optional time zone: # [31/Oct/2006:09:22:55 -0000] # 26-Jul-2007 15:20:52 # named 26-Jul-2007 15:20:52.252 # roundcube 26-Jul-2007 15:20:52 +0200 - self._cacheTemplate("%d(?P<_sep>[-/])%b(?P=_sep)%Y[ :]?%H:%M:%S(?:\.%f)?(?: %z)?") + self._cacheTemplate("%d(?P<_sep>[-/])%b(?P=_sep)%ExY[ :]?%H:%M:%S(?:\.%f)?(?: %z)?") # CPanel 05/20/2008:01:57:39 - self._cacheTemplate("%m/%d/%Y:%H:%M:%S") + self._cacheTemplate("%m/%d/%ExY:%H:%M:%S") # 01-27-2012 16:22:44.252 # subseconds explicit to avoid possible %m<->%d confusion - # with previous ("%d-%m-%Y %H:%M:%S" by "%d(?P<_sep>[-/])%m(?P=_sep)(?:%Y|%y) %H:%M:%S") - self._cacheTemplate("%m-%d-%Y %H:%M:%S(?:\.%f)?") + # with previous ("%d-%m-%ExY %H:%M:%S" by "%d(?P<_sep>[-/])%m(?P=_sep)(?:%ExY|%Exy) %H:%M:%S") + self._cacheTemplate("%m-%d-%ExY %H:%M:%S(?:\.%f)?") # TAI64N self._cacheTemplate(DateTai64n()) # Epoch @@ -116,13 +115,13 @@ class DateDetectorCache(object): # Only time information in the log self._cacheTemplate("^%H:%M:%S") # <09/16/08@05:03:30> - self._cacheTemplate("^<%m/%d/%y@%H:%M:%S>") + self._cacheTemplate("^<%m/%d/%Exy@%H:%M:%S>") # MySQL: 130322 11:46:11 - self._cacheTemplate("%y%Em%Ed ?%H:%M:%S") + self._cacheTemplate("%Exy%Exm%Exd ?%H:%M:%S") # Apache Tomcat - self._cacheTemplate("%b %d, %Y %I:%M:%S %p") + self._cacheTemplate("%b %d, %ExY %I:%M:%S %p") # ASSP: Apr-27-13 02:33:06 - self._cacheTemplate("^%b-%d-%y %H:%M:%S") + self._cacheTemplate("^%b-%d-%Exy %H:%M:%S") class DateDetectorTemplate(object): @@ -218,14 +217,14 @@ class DateDetector(object): """ i = 0 with self.__lock: - for ddtemplate in self.__templates: - template = ddtemplate.template + for ddtempl in self.__templates: + template = ddtempl.template match = template.matchDate(line) - if not match is None: + if match is not None: if logSys.getEffectiveLevel() <= logLevel: logSys.log(logLevel, "Matched time template %s", template.name) - ddtemplate.hits += 1 - ddtemplate.lastUsed = time.time() + ddtempl.hits += 1 + ddtempl.lastUsed = time.time() # if not first - try to reorder current template (bubble up), they will be not sorted anymore: if i: self._reorderTemplate(i) @@ -254,32 +253,21 @@ class DateDetector(object): The Unix timestamp returned from the first successfully matched template or None if not found. """ - if timeMatch: - template = timeMatch[1] - if template is not None: - try: - date = template.getDate(line, timeMatch[0]) - if date is not None: - if logSys.getEffectiveLevel() <= logLevel: - logSys.log(logLevel, "Got time %f for %r using template %s", - date[0], date[1].group(), template.name) - return date - except ValueError: - return None - with self.__lock: - for ddtemplate in self.__templates: - template = ddtemplate.template - try: - date = template.getDate(line) - if date is None: - continue + # search match for all specified templates: + if timeMatch is None: + timeMatch = self.matchTime(line) + # convert: + template = timeMatch[1] + if template is not None: + try: + date = template.getDate(line, timeMatch[0]) + if date is not None: if logSys.getEffectiveLevel() <= logLevel: - logSys.log(logLevel, "Got time %f for %r using template %s", + logSys.log(logLevel, "Got time %f for %r using template %s", date[0], date[1].group(), template.name) return date - except ValueError: # pragma: no cover - pass - return None + except ValueError: + return None def _reorderTemplate(self, num): """Reorder template (bubble up) in template list if hits grows enough. @@ -291,16 +279,16 @@ class DateDetector(object): """ if num: templates = self.__templates - template = templates[num] + ddtempl = templates[num] ## current hits and time the template was long unused: - untime = template.lastUsed - self.__unusedTime - hits = template.hits + untime = ddtempl.lastUsed - self.__unusedTime + hits = ddtempl.hits * ddtempl.template.weight ## try to move faster (first 2 if it still unused, or half of part to current template position): - phits = 0 for pos in (0, 1, num // 2): phits = templates[pos].hits - if not phits: + if not phits: # if we've found an unused break + phits *= templates[pos].template.weight ## don't move too often (multiline logs resp. log's with different date patterns), ## if template not used too long, replace it also : if not phits or hits > phits + 5 or templates[pos].lastUsed < untime: @@ -308,8 +296,9 @@ class DateDetector(object): if hits <= phits and templates[pos].lastUsed > untime: pos = num-1 ## if still smaller and template at position used, don't move: - if hits < templates[pos].hits and templates[pos].lastUsed > untime: + phits = templates[pos].hits * templates[pos].template.weight + if hits < phits and templates[pos].lastUsed > untime: return - templates[pos], templates[num] = template, templates[pos] + templates[pos], templates[num] = ddtempl, templates[pos] diff --git a/fail2ban/server/datetemplate.py b/fail2ban/server/datetemplate.py index 5fcc16a4..6548e5ee 100644 --- a/fail2ban/server/datetemplate.py +++ b/fail2ban/server/datetemplate.py @@ -32,6 +32,9 @@ from ..helpers import getLogger logSys = getLogger(__name__) +RE_NO_WRD_BOUND_BEG = re.compile(r'^(?:\^|\*\*|\(\?:\^)') +RE_NO_WRD_BOUND_END = re.compile(r'(? 1 else "".join(exprset) + #todo: implement literal time zone support like CET, PST, PDT, etc (via pytz): #timeRE['z'] = r"%s?(?PZ|[+-]\d{2}(?::?[0-5]\d)?|[A-Z]{3})?" % timeRE['Z'] timeRE['z'] = r"(?PZ|[+-]\d{2}(?::?[0-5]\d)?)" -# Extend build-in TimeRE with some exact (two-digit) patterns: -timeRE['Ed'] = r"(?P3[0-1]|[1-2]\d|0[1-9])" -timeRE['Em'] = r"(?P1[0-2]|0[1-9])" -timeRE['EH'] = r"(?P2[0-3]|[0-1]\d)" -timeRE['EM'] = r"(?P[0-5]\d)" -timeRE['ES'] = r"(?P6[0-1]|[0-5]\d)" +# Extend build-in TimeRE with some exact patterns +# exact two-digit patterns: +timeRE['Exd'] = r"(?P3[0-1]|[1-2]\d|0[1-9])" +timeRE['Exm'] = r"(?P1[0-2]|0[1-9])" +timeRE['ExH'] = r"(?P2[0-3]|[0-1]\d)" +timeRE['ExM'] = r"(?P[0-5]\d)" +timeRE['ExS'] = r"(?P6[0-1]|[0-5]\d)" +# more precise year patterns, within same century of last year and +# the next 3 years (for possible long uptime of fail2ban); thereby +# respect possible run in the test-cases (alternate date used there): +timeRE['ExY'] = r"(?P%s\d)" % _getYearCentRE(cent=(0,3), distance=3) +timeRE['Exy'] = r"(?P%s\d)" % _getYearCentRE(cent=(2,3), distance=3) +# Special pattern "start of the line", analogous to `wordBegin='start'` of default templates: +timeRE['ExLB'] = r"(?:^|(?<=^\W)|(?<=^\W{2}))" def getTimePatternRE(): keys = timeRE.keys() - return (r"%%(%%|%s|[%s])" % ( + patt = (r"%%(%%|%s|[%s])" % ( "|".join([k for k in keys if len(k) > 1]), "".join([k for k in keys if len(k) == 1]), )) - + names = { + 'a': "DAY", 'A': "DAYNAME", 'b': "MON", 'B': "MONTH", 'd': "Day", + 'H': "24hour", 'I': "12hour", 'j': "Yearday", 'm': "Month", + 'M': "Minute", 'p': "AMPM", 'S': "Second", 'U': "Yearweek", + 'w': "Weekday", 'W': "Yearweek", 'y': 'Year2', 'Y': "Year", '%': "%", + 'z': "Zone offset", 'f': "Microseconds", 'Z': "Zone name", + 'ExLB': '{^LN-BEG}', + } + for key in set(keys) - set(names): # may not have them all... + if key.startswith('Ex'): + kn = names.get(key[2:]) + if kn: + names[key] = "Ex" + kn + continue + names[key] = "%%%s" % key + return (patt, names) def reGroupDictStrptime(found_dict): """Return time from dictionary of strptime fields diff --git a/fail2ban/tests/datedetectortestcase.py b/fail2ban/tests/datedetectortestcase.py index 013e1e85..109a275f 100644 --- a/fail2ban/tests/datedetectortestcase.py +++ b/fail2ban/tests/datedetectortestcase.py @@ -30,7 +30,7 @@ import datetime from ..server.datedetector import DateDetector from ..server import datedetector -from ..server.datetemplate import DateTemplate +from ..server.datetemplate import DatePatternRegex, DateTemplate from .utils import setUpMyTime, tearDownMyTime, LogCaptureTestCase from ..helpers import getLogger @@ -89,6 +89,10 @@ class DateDetectorTest(LogCaptureTestCase): """ dateUnix = 1106513999.0 + # anchored - matching expression (pattern) is anchored + # bound - pattern can be tested using word boundary (e.g. False if contains in front some optional part) + # sdate - date string used in test log-line + # rdate - if specified, the result match, which differs from sdate for anchored, bound, sdate, rdate in ( (False, True, "Jan 23 21:59:59", None), (False, False, "Sun Jan 23 21:59:59 2005", None), @@ -113,15 +117,15 @@ class DateDetectorTest(LogCaptureTestCase): (False, True, "2005-01-23T20:59:59.252Z", None), #ISO 8601 (UTC) (False, True, "2005-01-23T15:59:59-05:00", None), #ISO 8601 with TZ (False, True, "2005-01-23 21:59:59", None), #ISO 8601 no TZ, assume local - (False, True, "20050123T215959", None), #Short ISO - (False, True, "20050123 215959", None), #Short ISO + (False, True, "20050123T215959", None), #Short ISO with T + (False, True, "20050123 215959", None), #Short ISO with space (True, True, "<01/23/05@21:59:59>", None), (False, True, "050123 21:59:59", None), # MySQL (True, True, "Jan-23-05 21:59:59", None), # ASSP like (False, True, "Jan 23, 2005 9:59:59 PM", None), # Apache Tomcat (True, True, "1106513999", None), # Regular epoch (True, True, "1106513999.000", None), # Regular epoch with millisec - (True, True, "[1106513999.000]", "1106513999.000"), # epoch squared + (True, True, "[1106513999.000]", "1106513999.000"), # epoch squared (brackets are not in match) (False, True, "audit(1106513999.000:987)", "1106513999.000"), # SELinux ): logSys.debug('== test %r', (anchored, bound, sdate)) @@ -195,6 +199,141 @@ class DateDetectorTest(LogCaptureTestCase): self.assertEqual(t.matchDate('aaaac').group(), 'aaaac') +iso8601 = DatePatternRegex("%Y-%m-%d[T ]%H:%M:%S(?:\.%f)?%z") + +class CustomDateFormatsTest(unittest.TestCase): + + def testIso8601(self): + date = datetime.datetime.utcfromtimestamp( + iso8601.getDate("2007-01-25T12:00:00Z")[0]) + self.assertEqual( + date, + datetime.datetime(2007, 1, 25, 12, 0)) + self.assertRaises(TypeError, iso8601.getDate, None) + self.assertRaises(TypeError, iso8601.getDate, date) + + self.assertEqual(iso8601.getDate(""), None) + self.assertEqual(iso8601.getDate("Z"), None) + + self.assertEqual(iso8601.getDate("2007-01-01T120:00:00Z"), None) + self.assertEqual(iso8601.getDate("2007-13-01T12:00:00Z"), None) + date = datetime.datetime.utcfromtimestamp( + iso8601.getDate("2007-01-25T12:00:00+0400")[0]) + self.assertEqual( + date, + datetime.datetime(2007, 1, 25, 8, 0)) + date = datetime.datetime.utcfromtimestamp( + iso8601.getDate("2007-01-25T12:00:00+04:00")[0]) + self.assertEqual( + date, + datetime.datetime(2007, 1, 25, 8, 0)) + date = datetime.datetime.utcfromtimestamp( + iso8601.getDate("2007-01-25T12:00:00-0400")[0]) + self.assertEqual( + date, + datetime.datetime(2007, 1, 25, 16, 0)) + date = datetime.datetime.utcfromtimestamp( + iso8601.getDate("2007-01-25T12:00:00-04")[0]) + self.assertEqual( + date, + datetime.datetime(2007, 1, 25, 16, 0)) + + def testAmbiguousDatePattern(self): + defDD = DateDetector() + defDD.addDefaultTemplate() + for (matched, dp, line) in ( + # positive case: + ('Jan 23 21:59:59', None, 'Test failure Jan 23 21:59:59 for 192.0.2.1'), + # ambiguous "unbound" patterns (missed): + (False, None, 'Test failure TestJan 23 21:59:59.011 2015 for 192.0.2.1'), + (False, None, 'Test failure Jan 23 21:59:59123456789 for 192.0.2.1'), + # ambiguous "no optional year" patterns (matched): + ('Aug 8 11:25:50', None, 'Aug 8 11:25:50 20030f2329b8 Authentication failed from 192.0.2.1'), + ('Aug 8 11:25:50', None, '[Aug 8 11:25:50] 20030f2329b8 Authentication failed from 192.0.2.1'), + ('Aug 8 11:25:50 2014', None, 'Aug 8 11:25:50 2014 20030f2329b8 Authentication failed from 192.0.2.1'), + # direct specified patterns: + ('20:00:00 01.02.2003', r'%H:%M:%S %d.%m.%Y$', '192.0.2.1 at 20:00:00 01.02.2003'), + ('[20:00:00 01.02.2003]', r'\[%H:%M:%S %d.%m.%Y\]', '192.0.2.1[20:00:00 01.02.2003]'), + ('[20:00:00 01.02.2003]', r'\[%H:%M:%S %d.%m.%Y\]', '[20:00:00 01.02.2003]192.0.2.1'), + ('[20:00:00 01.02.2003]', r'\[%H:%M:%S %d.%m.%Y\]$', '192.0.2.1[20:00:00 01.02.2003]'), + ('[20:00:00 01.02.2003]', r'^\[%H:%M:%S %d.%m.%Y\]', '[20:00:00 01.02.2003]192.0.2.1'), + ('[17/Jun/2011 17:00:45]', r'^\[%d/%b/%Y %H:%M:%S\]', '[17/Jun/2011 17:00:45] Attempt, IP address 192.0.2.1'), + ('[17/Jun/2011 17:00:45]', r'\[%d/%b/%Y %H:%M:%S\]', 'Attempt [17/Jun/2011 17:00:45] IP address 192.0.2.1'), + ('[17/Jun/2011 17:00:45]', r'\[%d/%b/%Y %H:%M:%S\]', 'Attempt IP address 192.0.2.1, date: [17/Jun/2011 17:00:45]'), + # direct specified patterns (begin/end, missed): + (False, r'%H:%M:%S %d.%m.%Y', '192.0.2.1x20:00:00 01.02.2003'), + (False, r'%H:%M:%S %d.%m.%Y', '20:00:00 01.02.2003x192.0.2.1'), + # direct specified unbound patterns (no begin/end boundary): + ('20:00:00 01.02.2003', r'**%H:%M:%S %d.%m.%Y**', '192.0.2.1x20:00:00 01.02.2003'), + ('20:00:00 01.02.2003', r'**%H:%M:%S %d.%m.%Y**', '20:00:00 01.02.2003x192.0.2.1'), + # pattern enclosed with stars (in comparison to example above): + ('*20:00:00 01.02.2003*', r'\**%H:%M:%S %d.%m.%Y\**', 'test*20:00:00 01.02.2003*test'), + # direct specified patterns (begin/end, matched): + ('20:00:00 01.02.2003', r'%H:%M:%S %d.%m.%Y', '192.0.2.1 20:00:00 01.02.2003'), + ('20:00:00 01.02.2003', r'%H:%M:%S %d.%m.%Y', '20:00:00 01.02.2003 192.0.2.1'), + # wrong year in 1st date, so failed by convert using not precise year (filter used last known date), + # in the 2nd and 3th tests (with precise year) it should find correct the 2nd date: + (None, r'%Y-%Exm-%Exd %ExH:%ExM:%ExS', "0000-12-30 00:00:00 - 2003-12-30 00:00:00"), + ('2003-12-30 00:00:00', r'%ExY-%Exm-%Exd %ExH:%ExM:%ExS', "0000-12-30 00:00:00 - 2003-12-30 00:00:00"), + ('2003-12-30 00:00:00', None, "0000-12-30 00:00:00 - 2003-12-30 00:00:00"), + # wrong date recognized short month/day (unbounded date pattern without separator between parts), + # in the 2nd and 3th tests (with precise month and day) it should find correct the 2nd date: + ('200333 010203', r'%Y%m%d %H%M%S', "text:200333 010203 | date:20031230 010203"), + ('20031230 010203', r'%ExY%Exm%Exd %ExH%ExM%ExS', "text:200333 010203 | date:20031230 010203"), + ('20031230 010203', None, "text:200333 010203 | date:20031230 010203"), + # Explicit bound in start of the line using %ExLB key, + # (negative) in the 1st case without line begin boundary - wrong date may be found, + # (positive) in the 2nd case with line begin boundary - unexpected date / log line (not found) + # (positive) and in 3th case with line begin boundary - find the correct date + ("20030101 000000", "%ExY%Exm%Exd %ExH%ExM%ExS", "00001230 010203 - 20030101 000000"), + (None, "%ExLB%ExY%Exm%Exd %ExH%ExM%ExS", "00001230 010203 - 20030101 000000"), + ("20031230 010203", "%ExLB%ExY%Exm%Exd %ExH%ExM%ExS", "20031230 010203 - 20030101 000000"), + # Explicit bound in start of the line using %ExLB key, + # up to 2 non-alphanumeric chars front, ** - no word boundary on the right + ("20031230010203", "%ExLB%ExY%Exm%Exd%ExH%ExM%ExS**", "2003123001020320030101000000"), + ("20031230010203", "%ExLB%ExY%Exm%Exd%ExH%ExM%ExS**", "#2003123001020320030101000000"), + ("20031230010203", "%ExLB%ExY%Exm%Exd%ExH%ExM%ExS**", "##2003123001020320030101000000"), + ("20031230010203", "%ExLB%ExY%Exm%Exd%ExH%ExM%ExS", "[20031230010203]20030101000000"), + ): + logSys.debug('== test: %r', (matched, dp, line)) + if dp is None: + dd = defDD + else: + dp = DatePatternRegex(dp) + dd = DateDetector() + dd.appendTemplate(dp) + date = dd.getTime(line) + if matched: + self.assertTrue(date) + self.assertEqual(matched, date[1].group()) + else: + self.assertEqual(date, None) + + # def testAmbiguousUsingOrderedTemplates(self): + # defDD = DateDetector() + # defDD.addDefaultTemplate() + # for (matched, dp, line) in ( + # # wrong date recognized short month/day (unbounded date pattern without separator), + # # in the 2nd and 3th tests (with precise month and day) it should find correct the 2nd date: + # ('200333 010203', r'%Y%m%d %H%M%S', "text:200333 010203 | date:20031230 010203"), + # ('20031230 010203', r'%ExY%Exm%Exd %ExH%ExM%ExS', "text:200333 010203 | date:20031230 010203"), + # ('20031230 010203', None, "text:200333 010203 | date:20031230 010203"), + # ): + # logSys.debug('== test: %r', (matched, dp, line)) + # if dp is None: + # dd = defDD + # else: + # dp = DatePatternRegex(dp) + # dd = DateDetector() + # dd.appendTemplate(dp) + # date = dd.getTime(line) + # if matched: + # self.assertTrue(date) + # self.assertEqual(matched, date[1].group()) + # else: + # self.assertEqual(date, None) + + # def testDefaultTempate(self): # self.__datedetector.setDefaultRegex("^\S{3}\s{1,2}\d{1,2} \d{2}:\d{2}:\d{2}") # self.__datedetector.setDefaultPattern("%b %d %H:%M:%S") diff --git a/fail2ban/tests/fail2banregextestcase.py b/fail2ban/tests/fail2banregextestcase.py index 4445fe6b..c62377a9 100644 --- a/fail2ban/tests/fail2banregextestcase.py +++ b/fail2ban/tests/fail2banregextestcase.py @@ -178,7 +178,7 @@ class Fail2banRegexTest(LogCaptureTestCase): def testVerbose(self): (opts, args, fail2banRegex) = _Fail2banRegex( - "--verbose", "--print-no-missed", + "--verbose", "--verbose-date", "--print-no-missed", Fail2banRegexTest.FILENAME_02, Fail2banRegexTest.RE_00 ) diff --git a/fail2ban/tests/files/logs/zzz-generic-example b/fail2ban/tests/files/logs/zzz-generic-example index 2044c387..51d3974c 100644 --- a/fail2ban/tests/files/logs/zzz-generic-example +++ b/fail2ban/tests/files/logs/zzz-generic-example @@ -30,8 +30,8 @@ Jun 21 16:55:02 machine kernel: [ 970.699396] @vserver_demo test- # failJSON: { "time": "2005-06-21T16:55:03", "match": true , "host": "192.0.2.3" } [Jun 21 16:55:03] machine kernel: [ 970.699396] @vserver_demo test-demo(pam_unix)[13709] [ID 255 test] F2B: failure from 192.0.2.3 -# -- wrong time direct in journal-line (used last known date): -# failJSON: { "time": "2005-06-21T16:55:03", "match": true , "host": "192.0.2.1" } +# -- wrong time direct in journal-line (using precise year pattern): +# failJSON: { "match": false} 0000-12-30 00:00:00 server test-demo[47831]: F2B: failure from 192.0.2.1 # -- wrong time after newline in message (plist without escaped newlines): # failJSON: { "match": false } @@ -42,8 +42,8 @@ Jun 22 20:37:04 server test-demo[402]: writeToStorage plist={ applicationDate = "0000-12-30 00:00:00 +0000"; # failJSON: { "match": false } } -# -- wrong time direct in journal-line (used last known date): -# failJSON: { "time": "2005-06-22T20:37:04", "match": true , "host": "192.0.2.2" } +# -- wrong time direct in journal-line (using precise year pattern): +# failJSON: { "match": false} 0000-12-30 00:00:00 server test-demo[47831]: F2B: failure from 192.0.2.2 # failJSON: { "time": "2005-06-21T16:56:02", "match": true , "host": "192.0.2.250" } diff --git a/fail2ban/tests/filtertestcase.py b/fail2ban/tests/filtertestcase.py index 2b57ce47..6c1a637c 100644 --- a/fail2ban/tests/filtertestcase.py +++ b/fail2ban/tests/filtertestcase.py @@ -283,10 +283,10 @@ class BasicFilter(unittest.TestCase): def testGetSetDatePattern(self): self.assertEqual(self.filter.getDatePattern(), (None, "Default Detectors")) - self.filter.setDatePattern("^%Y-%m-%d-%H%M%S.%f %z") + self.filter.setDatePattern("^%Y-%m-%d-%H%M%S.%f %z **") self.assertEqual(self.filter.getDatePattern(), - ("^%Y-%m-%d-%H%M%S.%f %z", - "^Year-Month-Day-24hourMinuteSecond.Microseconds Zone offset")) + ("^%Y-%m-%d-%H%M%S.%f %z **", + "^Year-Month-Day-24hourMinuteSecond.Microseconds Zone offset **")) def testAssertWrongTime(self): self.assertRaises(AssertionError, diff --git a/fail2ban/tests/misctestcase.py b/fail2ban/tests/misctestcase.py index 450904d5..908e4f6c 100644 --- a/fail2ban/tests/misctestcase.py +++ b/fail2ban/tests/misctestcase.py @@ -23,13 +23,11 @@ __license__ = "GPL" import logging import os -import re import sys import unittest import tempfile import shutil import fnmatch -import datetime from glob import glob from StringIO import StringIO @@ -37,8 +35,6 @@ from utils import LogCaptureTestCase, logSys as DefLogSys from ..helpers import formatExceptionInfo, mbasename, TraceBack, FormatterWithTraceBack, getLogger, uni_decode from ..helpers import splitwords -from ..server.datedetector import DateDetector -from ..server.datetemplate import DatePatternRegex from ..server.mytime import MyTime @@ -320,91 +316,6 @@ class TestsUtilsTest(LogCaptureTestCase): self.assertRaisesRegexp(Exception, 'not all arguments converted', lambda: logSys.debug('test', 1, 2, 3)) -iso8601 = DatePatternRegex("%Y-%m-%d[T ]%H:%M:%S(?:\.%f)?%z") - - -class CustomDateFormatsTest(unittest.TestCase): - - def testIso8601(self): - date = datetime.datetime.utcfromtimestamp( - iso8601.getDate("2007-01-25T12:00:00Z")[0]) - self.assertEqual( - date, - datetime.datetime(2007, 1, 25, 12, 0)) - self.assertRaises(TypeError, iso8601.getDate, None) - self.assertRaises(TypeError, iso8601.getDate, date) - - self.assertEqual(iso8601.getDate(""), None) - self.assertEqual(iso8601.getDate("Z"), None) - - self.assertEqual(iso8601.getDate("2007-01-01T120:00:00Z"), None) - self.assertEqual(iso8601.getDate("2007-13-01T12:00:00Z"), None) - date = datetime.datetime.utcfromtimestamp( - iso8601.getDate("2007-01-25T12:00:00+0400")[0]) - self.assertEqual( - date, - datetime.datetime(2007, 1, 25, 8, 0)) - date = datetime.datetime.utcfromtimestamp( - iso8601.getDate("2007-01-25T12:00:00+04:00")[0]) - self.assertEqual( - date, - datetime.datetime(2007, 1, 25, 8, 0)) - date = datetime.datetime.utcfromtimestamp( - iso8601.getDate("2007-01-25T12:00:00-0400")[0]) - self.assertEqual( - date, - datetime.datetime(2007, 1, 25, 16, 0)) - date = datetime.datetime.utcfromtimestamp( - iso8601.getDate("2007-01-25T12:00:00-04")[0]) - self.assertEqual( - date, - datetime.datetime(2007, 1, 25, 16, 0)) - - def testAmbiguousDatePattern(self): - defDD = DateDetector() - defDD.addDefaultTemplate() - logSys = DefLogSys - for (matched, dp, line) in ( - # positive case: - ('Jan 23 21:59:59', None, 'Test failure Jan 23 21:59:59 for 192.0.2.1'), - # ambiguous "unbound" patterns (missed): - (False, None, 'Test failure TestJan 23 21:59:59.011 2015 for 192.0.2.1'), - (False, None, 'Test failure Jan 23 21:59:59123456789 for 192.0.2.1'), - # ambiguous "no optional year" patterns (matched): - ('Aug 8 11:25:50', None, 'Aug 8 11:25:50 14430f2329b8 Authentication failed from 192.0.2.1'), - ('Aug 8 11:25:50', None, '[Aug 8 11:25:50] 14430f2329b8 Authentication failed from 192.0.2.1'), - ('Aug 8 11:25:50 2014', None, 'Aug 8 11:25:50 2014 14430f2329b8 Authentication failed from 192.0.2.1'), - # direct specified patterns: - ('20:00:00 01.02.2003', r'%H:%M:%S %d.%m.%Y$', '192.0.2.1 at 20:00:00 01.02.2003'), - ('[20:00:00 01.02.2003]', r'\[%H:%M:%S %d.%m.%Y\]', '192.0.2.1[20:00:00 01.02.2003]'), - ('[20:00:00 01.02.2003]', r'\[%H:%M:%S %d.%m.%Y\]', '[20:00:00 01.02.2003]192.0.2.1'), - ('[20:00:00 01.02.2003]', r'\[%H:%M:%S %d.%m.%Y\]$', '192.0.2.1[20:00:00 01.02.2003]'), - ('[20:00:00 01.02.2003]', r'^\[%H:%M:%S %d.%m.%Y\]', '[20:00:00 01.02.2003]192.0.2.1'), - ('[17/Jun/2011 17:00:45]', r'^\[%d/%b/%Y %H:%M:%S\]', '[17/Jun/2011 17:00:45] Attempt, IP address 192.0.2.1'), - ('[17/Jun/2011 17:00:45]', r'\[%d/%b/%Y %H:%M:%S\]', 'Attempt [17/Jun/2011 17:00:45] IP address 192.0.2.1'), - ('[17/Jun/2011 17:00:45]', r'\[%d/%b/%Y %H:%M:%S\]', 'Attempt IP address 192.0.2.1, date: [17/Jun/2011 17:00:45]'), - # direct specified patterns (begin/end, missed): - (False, r'%H:%M:%S %d.%m.%Y', '192.0.2.1x20:00:00 01.02.2003'), - (False, r'%H:%M:%S %d.%m.%Y', '20:00:00 01.02.2003x192.0.2.1'), - # direct specified patterns (begin/end, matched): - ('20:00:00 01.02.2003', r'%H:%M:%S %d.%m.%Y', '192.0.2.1 20:00:00 01.02.2003'), - ('20:00:00 01.02.2003', r'%H:%M:%S %d.%m.%Y', '20:00:00 01.02.2003 192.0.2.1'), - ): - logSys.debug('== test: %r', (matched, dp, line)) - if dp is None: - dd = defDD - else: - dp = DatePatternRegex(dp) - dd = DateDetector() - dd.appendTemplate(dp) - date = dd.getTime(line) - if matched: - self.assertTrue(date) - self.assertEqual(matched, date[1].group()) - else: - self.assertEqual(date, None) - - class MyTimeTest(unittest.TestCase): def testStr2Seconds(self): diff --git a/fail2ban/tests/servertestcase.py b/fail2ban/tests/servertestcase.py index 56c85e94..d4320257 100644 --- a/fail2ban/tests/servertestcase.py +++ b/fail2ban/tests/servertestcase.py @@ -298,7 +298,7 @@ class Transmitter(TransmitterBase): def testDatePattern(self): self.setGetTest("datepattern", "%%%Y%m%d%H%M%S", - ("%%%Y%m%d%H%M%S", "%YearMonthDay24hourMinuteSecond"), + ("%%%Y%m%d%H%M%S", "{*WD-BEG}%YearMonthDay24hourMinuteSecond{*WD-END}"), jail=self.jailName) self.setGetTest( "datepattern", "Epoch", (None, "Epoch"), jail=self.jailName) diff --git a/fail2ban/tests/utils.py b/fail2ban/tests/utils.py index f60dfd1f..22079456 100644 --- a/fail2ban/tests/utils.py +++ b/fail2ban/tests/utils.py @@ -48,6 +48,8 @@ from ..version import version logSys = getLogger(__name__) +TEST_NOW = 1124013600 + CONFIG_DIR = os.environ.get('FAIL2BAN_CONFIG_DIR', None) if not CONFIG_DIR: @@ -257,6 +259,10 @@ def initTests(opts): def F2B_SkipIfNoNetwork(): raise unittest.SkipTest('Skip test because of "--no-network"') unittest.F2B.SkipIfNoNetwork = F2B_SkipIfNoNetwork + + # set alternate now for time related test cases: + MyTime.setAlternateNow(TEST_NOW) + # precache all invalid ip's (TEST-NET-1, ..., TEST-NET-3 according to RFC 5737): c = DNSUtils.CACHE_ipToName for i in xrange(255): @@ -289,7 +295,7 @@ def setUpMyTime(): # yoh: we need to adjust TZ to match the one used by Cyril so all the timestamps match os.environ['TZ'] = 'Europe/Zurich' time.tzset() - MyTime.setTime(1124013600) + MyTime.setTime(TEST_NOW) def tearDownMyTime(): @@ -384,7 +390,6 @@ def gatherTests(regexps=None, opts=None): tests.addTest(unittest.makeSuite(misctestcase.HelpersTest)) tests.addTest(unittest.makeSuite(misctestcase.SetupTest)) tests.addTest(unittest.makeSuite(misctestcase.TestsUtilsTest)) - tests.addTest(unittest.makeSuite(misctestcase.CustomDateFormatsTest)) tests.addTest(unittest.makeSuite(misctestcase.MyTimeTest)) # Database tests.addTest(unittest.makeSuite(databasetestcase.DatabaseTest)) @@ -404,6 +409,7 @@ def gatherTests(regexps=None, opts=None): # DateDetector tests.addTest(unittest.makeSuite(datedetectortestcase.DateDetectorTest)) + tests.addTest(unittest.makeSuite(datedetectortestcase.CustomDateFormatsTest)) # Filter Regex tests with sample logs tests.addTest(unittest.makeSuite(samplestestcase.FilterSamplesRegex)) From 75a5440acf96c1b3fda37eef9b86de92ee3b1c17 Mon Sep 17 00:00:00 2001 From: sebres Date: Fri, 30 Sep 2016 20:37:02 +0200 Subject: [PATCH 03/19] extends date detector template with distance (position of match in log-line), to prevent grave collision using (re)ordered template list (e.g. find-spot of wrong date-match inside foreign input, misleading date patterns by ambiguous formats, etc.); By change of the distance (e.g. another format found), the pattern with smallest distance will be always preferred now. To speedup (template lookup) resp. minimize of list reorder counts, the distance will be used as divider factor of the template weight by the templates comparison. --- fail2ban/server/datedetector.py | 81 ++++++++++++++++++-------- fail2ban/tests/datedetectortestcase.py | 47 ++++++++------- 2 files changed, 81 insertions(+), 47 deletions(-) diff --git a/fail2ban/server/datedetector.py b/fail2ban/server/datedetector.py index d26743a3..63ae3692 100644 --- a/fail2ban/server/datedetector.py +++ b/fail2ban/server/datedetector.py @@ -129,11 +129,13 @@ class DateDetectorTemplate(object): Prevents collectively usage of hits/lastUsed in cached templates """ - __slots__ = ('template', 'hits', 'lastUsed') + __slots__ = ('template', 'hits', 'lastUsed', 'distance') def __init__(self, template): self.template = template self.hits = 0 self.lastUsed = 0 + # the last distance to date-match within the log file: + self.distance = 0x7fffffff def __getattr__(self, name): """ Returns attribute of template (called for parameters not in slots) @@ -156,6 +158,8 @@ class DateDetector(object): self.__known_names = set() # time the template was long unused (currently 300 == 5m): self.__unusedTime = 300 + # first free place: + self.__firstUnused = 0 def _appendTemplate(self, template): name = template.name @@ -215,22 +219,52 @@ class DateDetector(object): The regex match returned from the first successfully matched template. """ + match = None i = 0 + found = None, 0x7fffffff, -1 with self.__lock: for ddtempl in self.__templates: template = ddtempl.template match = template.matchDate(line) if match is not None: + distance = max(1, match.start() + 1) if logSys.getEffectiveLevel() <= logLevel: - logSys.log(logLevel, "Matched time template %s", template.name) - ddtempl.hits += 1 - ddtempl.lastUsed = time.time() - # if not first - try to reorder current template (bubble up), they will be not sorted anymore: - if i: - self._reorderTemplate(i) - # return tuple with match and template reference used for parsing: - return (match, template) + logSys.log(logLevel, " matched time template #%r (at %r <= %r) %s", + i, distance, ddtempl.distance, template.name) + ## [grave] if distance changed, possible date-match was found somewhere + ## in body of message, so save this template, and search further: + if distance > ddtempl.distance and len(self.__templates) > 1: + if logSys.getEffectiveLevel() <= logLevel: + logSys.log(logLevel, " ** distance collision - pattern change, reserve") + ## shortest of both: + if distance < found[1]: + found = match, distance, i + ## search further: + match = None + i += 1 + continue + ## winner - stop search: + break i += 1 + # check other template was found (use this one with shortest distance): + if match is None and found[0]: + match, distance, i = found + ddtempl = self.__templates[i] + template = ddtempl.template + # we've winner, incr hits, set distance, usage, reorder, etc: + if match is not None: + ddtempl.hits += 1 + ddtempl.distance = distance + ddtempl.lastUsed = time.time() + if self.__firstUnused == i: + self.__firstUnused += 1 + # if not first - try to reorder current template (bubble up), they will be not sorted anymore: + if i: + logSys.log(logLevel, " -> reorder template #%r, hits: %r", i, ddtempl.hits) + self._reorderTemplate(i) + # return tuple with match and template reference used for parsing: + return (match, template) + # not found: return (None, None) @@ -263,7 +297,7 @@ class DateDetector(object): date = template.getDate(line, timeMatch[0]) if date is not None: if logSys.getEffectiveLevel() <= logLevel: - logSys.log(logLevel, "Got time %f for %r using template %s", + logSys.log(logLevel, " got time %f for %r using template %s", date[0], date[1].group(), template.name) return date except ValueError: @@ -282,23 +316,24 @@ class DateDetector(object): ddtempl = templates[num] ## current hits and time the template was long unused: untime = ddtempl.lastUsed - self.__unusedTime - hits = ddtempl.hits * ddtempl.template.weight - ## try to move faster (first 2 if it still unused, or half of part to current template position): - for pos in (0, 1, num // 2): - phits = templates[pos].hits - if not phits: # if we've found an unused - break - phits *= templates[pos].template.weight + weight = ddtempl.hits * ddtempl.template.weight / ddtempl.distance + ## try to move faster (first if unused available, or half of part to current template position): + pos = self.__firstUnused if self.__firstUnused < num else num // 2 + pweight = templates[pos].hits * templates[pos].template.weight / templates[pos].distance ## don't move too often (multiline logs resp. log's with different date patterns), ## if template not used too long, replace it also : - if not phits or hits > phits + 5 or templates[pos].lastUsed < untime: + logSys.log(logLevel, " -> compare template #%r & #%r, weight %r > %r", num, pos, weight, pweight) + if not pweight or weight > pweight + 5 or templates[pos].lastUsed < untime: ## if not larger (and target position recently used) - move slow (exact 1 position): - if hits <= phits and templates[pos].lastUsed > untime: + if weight <= pweight and templates[pos].lastUsed > untime: pos = num-1 ## if still smaller and template at position used, don't move: - phits = templates[pos].hits * templates[pos].template.weight - if hits < phits and templates[pos].lastUsed > untime: + pweight = templates[pos].hits * templates[pos].template.weight / templates[pos].distance + logSys.log(logLevel, " -> compare template #%r & #%r, weight %r > %r", num, pos, weight, pweight) + if weight < pweight and templates[pos].lastUsed > untime: return templates[pos], templates[num] = ddtempl, templates[pos] - - + logSys.log(logLevel, " -> moved template #%r -> #%r", num, pos) + ## correct first unused: + if pos == self.__firstUnused: + self.__firstUnused += 1 diff --git a/fail2ban/tests/datedetectortestcase.py b/fail2ban/tests/datedetectortestcase.py index 109a275f..9616edbb 100644 --- a/fail2ban/tests/datedetectortestcase.py +++ b/fail2ban/tests/datedetectortestcase.py @@ -198,6 +198,29 @@ class DateDetectorTest(LogCaptureTestCase): self.assertRaises(Exception, t.getDate, '') self.assertEqual(t.matchDate('aaaac').group(), 'aaaac') + def testAmbiguousInOrderedTemplates(self): + dd = DateDetector() + dd.addDefaultTemplate() + for (debit, line, cnt) in ( + ("2003-03-07 17:05:01", "some free text 2003-03-07 17:05:01 test ...", 15), + # distance collision detection (date from foreign input should not be found): + ("030324 0:04:00", "server mysqld[1000]: 030324 0:04:00 [Warning] Access denied ..." + " foreign-input just some free text 2003-03-07 17:05:01 test", 10), + # distance collision detection (first date should be found): + ("Sep 16 21:30:26", "server mysqld[1020]: Sep 16 21:30:26 server mysqld: 030916 21:30:26 [Warning] Access denied", 10), + # just to test sorting: + ("2005-10-07 06:09:42", "server mysqld[5906]: 2005-10-07 06:09:42 5907 [Warning] Access denied", 20), + ("2005-10-08T15:26:18.237955", "server mysqld[5906]: 2005-10-08T15:26:18.237955 6 [Note] Access denied", 20), + # date format changed again: + ("051009 10:05:30", "server mysqld[1000]: 051009 10:05:30 [Warning] Access denied ...", 20), + ): + logSys.debug('== test: %r', (debit, line, cnt)) + for i in range(cnt): + logSys.debug('Line: %s', line) + match, template = dd.matchTime(line) + self.assertTrue(match) + self.assertEqual(match.group(), debit) + iso8601 = DatePatternRegex("%Y-%m-%d[T ]%H:%M:%S(?:\.%f)?%z") @@ -309,30 +332,6 @@ class CustomDateFormatsTest(unittest.TestCase): else: self.assertEqual(date, None) - # def testAmbiguousUsingOrderedTemplates(self): - # defDD = DateDetector() - # defDD.addDefaultTemplate() - # for (matched, dp, line) in ( - # # wrong date recognized short month/day (unbounded date pattern without separator), - # # in the 2nd and 3th tests (with precise month and day) it should find correct the 2nd date: - # ('200333 010203', r'%Y%m%d %H%M%S', "text:200333 010203 | date:20031230 010203"), - # ('20031230 010203', r'%ExY%Exm%Exd %ExH%ExM%ExS', "text:200333 010203 | date:20031230 010203"), - # ('20031230 010203', None, "text:200333 010203 | date:20031230 010203"), - # ): - # logSys.debug('== test: %r', (matched, dp, line)) - # if dp is None: - # dd = defDD - # else: - # dp = DatePatternRegex(dp) - # dd = DateDetector() - # dd.appendTemplate(dp) - # date = dd.getTime(line) - # if matched: - # self.assertTrue(date) - # self.assertEqual(matched, date[1].group()) - # else: - # self.assertEqual(date, None) - # def testDefaultTempate(self): # self.__datedetector.setDefaultRegex("^\S{3}\s{1,2}\d{1,2} \d{2}:\d{2}:\d{2}") From b9033d004ed7e4179e82b44d582dafd95b443b67 Mon Sep 17 00:00:00 2001 From: sebres Date: Tue, 4 Oct 2016 17:25:49 +0200 Subject: [PATCH 04/19] amend distance collision check - always find template with shortest distance, also first time (test-case extended); datedetector property template without lock, initially placed start-anchored templates at begin of template list, small optimization of strptime --- fail2ban/server/datedetector.py | 39 +++++++---- fail2ban/server/datetemplate.py | 8 +-- fail2ban/server/strptime.py | 92 ++++++++++++++------------ fail2ban/tests/datedetectortestcase.py | 3 + 4 files changed, 79 insertions(+), 63 deletions(-) diff --git a/fail2ban/server/datedetector.py b/fail2ban/server/datedetector.py index 63ae3692..e2c14d05 100644 --- a/fail2ban/server/datedetector.py +++ b/fail2ban/server/datedetector.py @@ -46,6 +46,8 @@ class DateDetectorCache(object): def templates(self): """List of template instances managed by the detector. """ + if self.__templates: + return self.__templates with self.__lock: if self.__templates: return self.__templates @@ -60,21 +62,22 @@ class DateDetectorCache(object): # exact given template with word benin-end boundary: template = DatePatternRegex(template) # additional template, that prefers datetime at start of a line (safety+performance feature): - if 0 and hasattr(template, 'regex'): + if hasattr(template, 'regex'): template2 = copy.copy(template) regex = getattr(template, 'pattern', template.regex) template2.setRegex(regex, wordBegin='start', wordEnd=True) if template2.name != template.name: # increase weight of such templates, because they should be always # preferred in template sorting process (bubble up): - template2.weight = 100 - self.__templates.append(template2) + template2.weight = 100.0 + self.__tmpcache[0].append(template2) # add template: - self.__templates.append(template) + self.__tmpcache[1].append(template) def _addDefaultTemplate(self): """Add resp. cache Fail2Ban's default set of date templates. """ + self.__tmpcache = [], [] # ISO 8601, simple date, optional subsecond and timezone: # 2005-01-23T21:59:59.981746, 2005-01-23 21:59:59 # simple date: 2005/01/23 21:59:59 @@ -122,6 +125,8 @@ class DateDetectorCache(object): self._cacheTemplate("%b %d, %ExY %I:%M:%S %p") # ASSP: Apr-27-13 02:33:06 self._cacheTemplate("^%b-%d-%Exy %H:%M:%S") + self.__templates = self.__tmpcache[0] + self.__tmpcache[1] + del self.__tmpcache class DateDetectorTemplate(object): @@ -158,6 +163,8 @@ class DateDetector(object): self.__known_names = set() # time the template was long unused (currently 300 == 5m): self.__unusedTime = 300 + # last known distance: + self.__lastDistance = 0 # first free place: self.__firstUnused = 0 @@ -229,11 +236,14 @@ class DateDetector(object): if match is not None: distance = max(1, match.start() + 1) if logSys.getEffectiveLevel() <= logLevel: - logSys.log(logLevel, " matched time template #%r (at %r <= %r) %s", - i, distance, ddtempl.distance, template.name) + logSys.log(logLevel, " matched time template #%r (at %r <= %r, %r) %s", + i, distance, ddtempl.distance, self.__lastDistance, template.name) ## [grave] if distance changed, possible date-match was found somewhere ## in body of message, so save this template, and search further: - if distance > ddtempl.distance and len(self.__templates) > 1: + if ( + (distance > ddtempl.distance or distance > self.__lastDistance) and + len(self.__templates) > 1 + ): if logSys.getEffectiveLevel() <= logLevel: logSys.log(logLevel, " ** distance collision - pattern change, reserve") ## shortest of both: @@ -254,7 +264,7 @@ class DateDetector(object): # we've winner, incr hits, set distance, usage, reorder, etc: if match is not None: ddtempl.hits += 1 - ddtempl.distance = distance + self.__lastDistance = ddtempl.distance = distance ddtempl.lastUsed = time.time() if self.__firstUnused == i: self.__firstUnused += 1 @@ -322,18 +332,21 @@ class DateDetector(object): pweight = templates[pos].hits * templates[pos].template.weight / templates[pos].distance ## don't move too often (multiline logs resp. log's with different date patterns), ## if template not used too long, replace it also : - logSys.log(logLevel, " -> compare template #%r & #%r, weight %r > %r", num, pos, weight, pweight) - if not pweight or weight > pweight + 5 or templates[pos].lastUsed < untime: + logSys.log(logLevel, " -> compare template #%r & #%r, weight %r > %r, hits %r > %r", + num, pos, weight, pweight, ddtempl.hits, templates[pos].hits) + if not pweight or weight > pweight or templates[pos].lastUsed < untime: ## if not larger (and target position recently used) - move slow (exact 1 position): if weight <= pweight and templates[pos].lastUsed > untime: pos = num-1 ## if still smaller and template at position used, don't move: pweight = templates[pos].hits * templates[pos].template.weight / templates[pos].distance - logSys.log(logLevel, " -> compare template #%r & #%r, weight %r > %r", num, pos, weight, pweight) + logSys.log(logLevel, " -> compare template #%r & #%r, weight %r > %r, hits %r > %r", + num, pos, weight, pweight, ddtempl.hits, templates[pos].hits) if weight < pweight and templates[pos].lastUsed > untime: return - templates[pos], templates[num] = ddtempl, templates[pos] - logSys.log(logLevel, " -> moved template #%r -> #%r", num, pos) + del templates[num] + templates[pos:0] = [ddtempl] ## correct first unused: if pos == self.__firstUnused: self.__firstUnused += 1 + logSys.log(logLevel, " -> moved template #%r -> #%r", num, pos) diff --git a/fail2ban/server/datetemplate.py b/fail2ban/server/datetemplate.py index 6548e5ee..3d116b1e 100644 --- a/fail2ban/server/datetemplate.py +++ b/fail2ban/server/datetemplate.py @@ -50,7 +50,7 @@ class DateTemplate(object): def __init__(self): self.name = "" - self.weight = 1 + self.weight = 1.0 self._regex = "" self._cRegex = None @@ -232,11 +232,7 @@ class DatePatternRegex(DateTemplate): if not dateMatch: dateMatch = self.matchDate(line) if dateMatch: - groupdict = dict( - (key, value) - for key, value in dateMatch.groupdict().iteritems() - if value is not None) - return reGroupDictStrptime(groupdict), dateMatch + return reGroupDictStrptime(dateMatch.groupdict()), dateMatch class DateTai64n(DateTemplate): diff --git a/fail2ban/server/strptime.py b/fail2ban/server/strptime.py index 2012aae6..85045ed8 100644 --- a/fail2ban/server/strptime.py +++ b/fail2ban/server/strptime.py @@ -80,7 +80,7 @@ def getTimePatternRE(): names[key] = "%%%s" % key return (patt, names) -def reGroupDictStrptime(found_dict): +def reGroupDictStrptime(found_dict, msec=False): """Return time from dictionary of strptime fields This is tweaked from python built-in _strptime. @@ -109,14 +109,15 @@ def reGroupDictStrptime(found_dict): # weekday and julian defaulted to -1 so as to signal need to calculate # values weekday = julian = -1 - for group_key in found_dict.keys(): + for key, val in found_dict.iteritems(): + if val is None: continue # Directives not explicitly handled below: # c, x, X # handled by making out of other directives # U, W # worthless without day of the week - if group_key == 'y': - year = int(found_dict['y']) + if key == 'y': + year = int(val) # Open Group specification for strptime() states that a %y #value in the range of [00, 68] is in the century 2000, while #[69,99] is in the century 1900 @@ -124,20 +125,20 @@ def reGroupDictStrptime(found_dict): year += 2000 else: year += 1900 - elif group_key == 'Y': - year = int(found_dict['Y']) - elif group_key == 'm': - month = int(found_dict['m']) - elif group_key == 'B': - month = locale_time.f_month.index(found_dict['B'].lower()) - elif group_key == 'b': - month = locale_time.a_month.index(found_dict['b'].lower()) - elif group_key == 'd': - day = int(found_dict['d']) - elif group_key == 'H': - hour = int(found_dict['H']) - elif group_key == 'I': - hour = int(found_dict['I']) + elif key == 'Y': + year = int(val) + elif key == 'm': + month = int(val) + elif key == 'B': + month = locale_time.f_month.index(val.lower()) + elif key == 'b': + month = locale_time.a_month.index(val.lower()) + elif key == 'd': + day = int(val) + elif key == 'H': + hour = int(val) + elif key == 'I': + hour = int(val) ampm = found_dict.get('p', '').lower() # If there was no AM/PM indicator, we'll treat this like AM if ampm in ('', locale_time.am_pm[0]): @@ -152,37 +153,38 @@ def reGroupDictStrptime(found_dict): # 12 noon == 12 PM == hour 12 if hour != 12: hour += 12 - elif group_key == 'M': - minute = int(found_dict['M']) - elif group_key == 'S': - second = int(found_dict['S']) - elif group_key == 'f': - s = found_dict['f'] - # Pad to always return microseconds. - s += "0" * (6 - len(s)) - fraction = int(s) - elif group_key == 'A': - weekday = locale_time.f_weekday.index(found_dict['A'].lower()) - elif group_key == 'a': - weekday = locale_time.a_weekday.index(found_dict['a'].lower()) - elif group_key == 'w': - weekday = int(found_dict['w']) + elif key == 'M': + minute = int(val) + elif key == 'S': + second = int(val) + elif key == 'f': + if msec: + s = val + # Pad to always return microseconds. + s += "0" * (6 - len(s)) + fraction = int(s) + elif key == 'A': + weekday = locale_time.f_weekday.index(val.lower()) + elif key == 'a': + weekday = locale_time.a_weekday.index(val.lower()) + elif key == 'w': + weekday = int(val) if weekday == 0: weekday = 6 else: weekday -= 1 - elif group_key == 'j': - julian = int(found_dict['j']) - elif group_key in ('U', 'W'): - week_of_year = int(found_dict[group_key]) - if group_key == 'U': + elif key == 'j': + julian = int(val) + elif key in ('U', 'W'): + week_of_year = int(val) + if key == 'U': # U starts week on Sunday. week_of_year_start = 6 else: # W starts week on Monday. week_of_year_start = 0 - elif group_key == 'z': - z = found_dict['z'] + elif key == 'z': + z = val if z == "Z": tzoffset = 0 else: @@ -227,7 +229,7 @@ def reGroupDictStrptime(found_dict): # Actully create date date_result = datetime.datetime( year, month, day, hour, minute, second, fraction) - if gmtoff: + if gmtoff is not None: date_result = date_result - datetime.timedelta(seconds=gmtoff) if date_result > now and assume_today: @@ -240,7 +242,9 @@ def reGroupDictStrptime(found_dict): year=year-1, month=month, day=day) if gmtoff is not None: - return calendar.timegm(date_result.utctimetuple()) + tm = calendar.timegm(date_result.utctimetuple()) else: - return time.mktime(date_result.timetuple()) - + tm = time.mktime(date_result.timetuple()) + if msec: + tm += fraction/1000000.0 + return tm diff --git a/fail2ban/tests/datedetectortestcase.py b/fail2ban/tests/datedetectortestcase.py index 9616edbb..b862bbdb 100644 --- a/fail2ban/tests/datedetectortestcase.py +++ b/fail2ban/tests/datedetectortestcase.py @@ -202,6 +202,9 @@ class DateDetectorTest(LogCaptureTestCase): dd = DateDetector() dd.addDefaultTemplate() for (debit, line, cnt) in ( + # shortest distance to datetime should win: + ("030324 0:03:59", "some free text 030324 0:03:59 -- 2003-03-07 17:05:01 ...", 1), + # some free text with datetime: ("2003-03-07 17:05:01", "some free text 2003-03-07 17:05:01 test ...", 15), # distance collision detection (date from foreign input should not be found): ("030324 0:04:00", "server mysqld[1000]: 030324 0:04:00 [Warning] Access denied ..." From f56ff5f48b1075e59884e86a1a18332d98311c77 Mon Sep 17 00:00:00 2001 From: sebres Date: Wed, 5 Oct 2016 12:11:06 +0200 Subject: [PATCH 05/19] optimized to better usage of the last time template (parse part of line at the same place as last time, if enclosed in the same boundaries) thereby follow rule "shortest distance to datetime should win", so possible collision causes search though all templates; speedup it a little bit (not possible collision if distance <= 1 or if line-begin anchoring, so break search if such template found) --- fail2ban/server/datedetector.py | 132 +++++++++++++++++-------- fail2ban/server/datetemplate.py | 21 +++- fail2ban/tests/datedetectortestcase.py | 57 +++++++---- 3 files changed, 145 insertions(+), 65 deletions(-) diff --git a/fail2ban/server/datedetector.py b/fail2ban/server/datedetector.py index e2c14d05..ba2b6377 100644 --- a/fail2ban/server/datedetector.py +++ b/fail2ban/server/datedetector.py @@ -26,7 +26,7 @@ import time from threading import Lock -from .datetemplate import DatePatternRegex, DateTai64n, DateEpoch +from .datetemplate import DateTemplate, DatePatternRegex, DateTai64n, DateEpoch from ..helpers import getLogger # Gets the instance of the logger. @@ -142,6 +142,10 @@ class DateDetectorTemplate(object): # the last distance to date-match within the log file: self.distance = 0x7fffffff + @property + def weight(self): + return self.hits * self.template.weight / max(1, self.distance) + def __getattr__(self, name): """ Returns attribute of template (called for parameters not in slots) """ @@ -158,13 +162,14 @@ class DateDetector(object): _defCache = DateDetectorCache() def __init__(self): - self.__lock = Lock() self.__templates = list() self.__known_names = set() # time the template was long unused (currently 300 == 5m): self.__unusedTime = 300 - # last known distance: - self.__lastDistance = 0 + # last known distance (bypass one char collision) and end position: + self.__lastPos = 1, None + self.__lastEndPos = 0x7fffffff, None + self.__lastTemplIdx = 0x7fffffff # first free place: self.__firstUnused = 0 @@ -198,9 +203,8 @@ class DateDetector(object): def addDefaultTemplate(self): """Add Fail2Ban's default set of date templates. """ - with self.__lock: - for template in DateDetector._defCache.templates: - self._appendTemplate(template) + for template in DateDetector._defCache.templates: + self._appendTemplate(template) @property def templates(self): @@ -226,29 +230,59 @@ class DateDetector(object): The regex match returned from the first successfully matched template. """ + #logSys.log(logLevel, "try to match time for line: %.250s", line) match = None - i = 0 - found = None, 0x7fffffff, -1 - with self.__lock: + # first try to use last template with same start/end position: + i = self.__lastTemplIdx + if i < len(self.__templates): + ddtempl = self.__templates[i] + template = ddtempl.template + distance, endpos = self.__lastPos[0], self.__lastEndPos[0] + # check same boundaries left/right, otherwise possible collision/pattern switch: + if (line[distance-1:distance] == self.__lastPos[1] and + line[endpos:endpos+1] == self.__lastEndPos[1] + ): + if logSys.getEffectiveLevel() <= logLevel-1: + logSys.log(logLevel-1, " try to match last template #%02i (from %r to %r): ... %s ...", + i, distance, endpos, line[distance:endpos]) + match = template.matchDate(line, distance, endpos) + if match: + distance = match.start() + endpos = match.end() + # if different position, possible collision/pattern switch: + if distance == self.__lastPos[0] and endpos == self.__lastEndPos[0]: + logSys.log(logLevel, " matched last time template #%02i", i) + else: + logSys.log(logLevel, " ** last pattern collision - pattern change, search ...") + match = None + # search template and better match: + if not match: + self.__lastTemplIdx = 0x7fffffff + logSys.log(logLevel, " search template ...") + found = None, 0x7fffffff, -1 + i = 0 for ddtempl in self.__templates: template = ddtempl.template match = template.matchDate(line) - if match is not None: - distance = max(1, match.start() + 1) + if match: + distance = match.start() + endpos = match.end() if logSys.getEffectiveLevel() <= logLevel: - logSys.log(logLevel, " matched time template #%r (at %r <= %r, %r) %s", - i, distance, ddtempl.distance, self.__lastDistance, template.name) + logSys.log(logLevel, " matched time template #%02i (at %r <= %r, %r) %s", + i, distance, ddtempl.distance, self.__lastPos[0], template.name) + ## if line-begin anchored - stop searching: + if template.flags & DateTemplate.LINE_BEGIN: + break ## [grave] if distance changed, possible date-match was found somewhere ## in body of message, so save this template, and search further: if ( - (distance > ddtempl.distance or distance > self.__lastDistance) and + (distance > ddtempl.distance or distance > self.__lastPos[0]) and len(self.__templates) > 1 ): - if logSys.getEffectiveLevel() <= logLevel: - logSys.log(logLevel, " ** distance collision - pattern change, reserve") + logSys.log(logLevel, " ** distance collision - pattern change, reserve") ## shortest of both: if distance < found[1]: - found = match, distance, i + found = match, distance, endpos, i ## search further: match = None i += 1 @@ -257,25 +291,29 @@ class DateDetector(object): break i += 1 # check other template was found (use this one with shortest distance): - if match is None and found[0]: - match, distance, i = found + if not match and found[0]: + match, distance, endpos, i = found + logSys.log(logLevel, " use best time template #%02i", i) ddtempl = self.__templates[i] template = ddtempl.template - # we've winner, incr hits, set distance, usage, reorder, etc: - if match is not None: - ddtempl.hits += 1 - self.__lastDistance = ddtempl.distance = distance - ddtempl.lastUsed = time.time() - if self.__firstUnused == i: - self.__firstUnused += 1 - # if not first - try to reorder current template (bubble up), they will be not sorted anymore: - if i: - logSys.log(logLevel, " -> reorder template #%r, hits: %r", i, ddtempl.hits) - self._reorderTemplate(i) - # return tuple with match and template reference used for parsing: - return (match, template) + # we've winner, incr hits, set distance, usage, reorder, etc: + if match: + ddtempl.hits += 1 + ddtempl.lastUsed = time.time() + ddtempl.distance = distance + if self.__firstUnused == i: + self.__firstUnused += 1 + self.__lastPos = distance, line[distance-1:distance] + self.__lastEndPos = endpos, line[endpos:endpos+1] + # if not first - try to reorder current template (bubble up), they will be not sorted anymore: + if i: + i = self._reorderTemplate(i) + self.__lastTemplIdx = i + # return tuple with match and template reference used for parsing: + return (match, template) # not found: + logSys.log(logLevel, " no template.") return (None, None) def getTime(self, line, timeMatch=None): @@ -311,7 +349,8 @@ class DateDetector(object): date[0], date[1].group(), template.name) return date except ValueError: - return None + pass + return None def _reorderTemplate(self, num): """Reorder template (bubble up) in template list if hits grows enough. @@ -324,29 +363,36 @@ class DateDetector(object): if num: templates = self.__templates ddtempl = templates[num] + if logSys.getEffectiveLevel() <= logLevel: + logSys.log(logLevel, " -> reorder template #%02i, hits: %r", num, ddtempl.hits) ## current hits and time the template was long unused: untime = ddtempl.lastUsed - self.__unusedTime - weight = ddtempl.hits * ddtempl.template.weight / ddtempl.distance + weight = ddtempl.weight ## try to move faster (first if unused available, or half of part to current template position): pos = self.__firstUnused if self.__firstUnused < num else num // 2 - pweight = templates[pos].hits * templates[pos].template.weight / templates[pos].distance + pweight = templates[pos].weight ## don't move too often (multiline logs resp. log's with different date patterns), ## if template not used too long, replace it also : - logSys.log(logLevel, " -> compare template #%r & #%r, weight %r > %r, hits %r > %r", - num, pos, weight, pweight, ddtempl.hits, templates[pos].hits) + if logSys.getEffectiveLevel() <= logLevel: + logSys.log(logLevel, " -> compare template #%02i & #%02i, weight %.3f > %.3f, hits %r > %r", + num, pos, weight, pweight, ddtempl.hits, templates[pos].hits) if not pweight or weight > pweight or templates[pos].lastUsed < untime: ## if not larger (and target position recently used) - move slow (exact 1 position): if weight <= pweight and templates[pos].lastUsed > untime: pos = num-1 ## if still smaller and template at position used, don't move: - pweight = templates[pos].hits * templates[pos].template.weight / templates[pos].distance - logSys.log(logLevel, " -> compare template #%r & #%r, weight %r > %r, hits %r > %r", - num, pos, weight, pweight, ddtempl.hits, templates[pos].hits) + pweight = templates[pos].weight + if logSys.getEffectiveLevel() <= logLevel: + logSys.log(logLevel, " -> compare template #%02i & #%02i, weight %.3f > %.3f, hits %r > %r", + num, pos, weight, pweight, ddtempl.hits, templates[pos].hits) if weight < pweight and templates[pos].lastUsed > untime: return del templates[num] templates[pos:0] = [ddtempl] ## correct first unused: - if pos == self.__firstUnused: + while self.__firstUnused < len(templates) and templates[self.__firstUnused].hits: self.__firstUnused += 1 - logSys.log(logLevel, " -> moved template #%r -> #%r", num, pos) + if logSys.getEffectiveLevel() <= logLevel: + logSys.log(logLevel, " -> moved template #%02i -> #%02i", num, pos) + return pos + return num diff --git a/fail2ban/server/datetemplate.py b/fail2ban/server/datetemplate.py index 3d116b1e..d85c8f66 100644 --- a/fail2ban/server/datetemplate.py +++ b/fail2ban/server/datetemplate.py @@ -48,9 +48,14 @@ class DateTemplate(object): regex """ + LINE_BEGIN = 8 + WORD_BEGIN = 2 + WORD_END = 1 + def __init__(self): self.name = "" self.weight = 1.0 + self.flags = 0 self._regex = "" self._cRegex = None @@ -83,12 +88,14 @@ class DateTemplate(object): regex = regex.strip() # if word or line start boundary: if wordBegin and not RE_NO_WRD_BOUND_BEG.search(regex): + self.flags |= DateTemplate.WORD_BEGIN if wordBegin != 'start' else DateTemplate.LINE_BEGIN regex = (r'(?=^|\b|\W)' if wordBegin != 'start' else r"(?:^|(?<=^\W)|(?<=^\W{2}))") + regex self.name = ('{*WD-BEG}' if wordBegin != 'start' else '{^LN-BEG}') + self.name # if word end boundary: if wordEnd and not RE_NO_WRD_BOUND_END.search(regex): + self.flags |= DateTemplate.WORD_END regex += r'(?=\b|\W|$)' - self.name += ('{*WD-END}' if wordEnd else '') + self.name += '{*WD-END}' # remove possible special pattern "**" in front and end of regex: regex = RE_DEL_WRD_BOUNDS.sub('', regex) self._regex = regex @@ -97,12 +104,18 @@ class DateTemplate(object): """Regex used to search for date. """) - def matchDate(self, line): - """Check if regex for date matches on a log line. + def _compileRegex(self): + """Compile regex by first usage. """ if not self._cRegex: self._cRegex = re.compile(self.regex, re.UNICODE | re.IGNORECASE) - dateMatch = self._cRegex.search(line) + + def matchDate(self, line, *args): + """Check if regex for date matches on a log line. + """ + if not self._cRegex: + self._compileRegex() + dateMatch = self._cRegex.search(line, *args); # pos, endpos return dateMatch @abstractmethod diff --git a/fail2ban/tests/datedetectortestcase.py b/fail2ban/tests/datedetectortestcase.py index b862bbdb..a4c9b645 100644 --- a/fail2ban/tests/datedetectortestcase.py +++ b/fail2ban/tests/datedetectortestcase.py @@ -42,17 +42,20 @@ class DateDetectorTest(LogCaptureTestCase): def setUp(self): """Call before every test case.""" LogCaptureTestCase.setUp(self) - self.__old_eff_level = datedetector.logLevel - datedetector.logLevel = logSys.getEffectiveLevel() setUpMyTime() - self.__datedetector = DateDetector() - self.__datedetector.addDefaultTemplate() + self.__datedetector = None def tearDown(self): """Call after every test case.""" LogCaptureTestCase.tearDown(self) - datedetector.logLevel = self.__old_eff_level tearDownMyTime() + + @property + def datedetector(self): + if self.__datedetector is None: + self.__datedetector = DateDetector() + self.__datedetector.addDefaultTemplate() + return self.__datedetector def testGetEpochTime(self): # correct epoch time, using all variants: @@ -60,7 +63,7 @@ class DateDetectorTest(LogCaptureTestCase): for date in ("%s", "[%s]", "[%s.555]", "audit(%s.555:101)"): date = date % dateUnix log = date + " [sshd] error: PAM: Authentication failure" - datelog = self.__datedetector.getTime(log) + datelog = self.datedetector.getTime(log) self.assertTrue(datelog, "Parse epoch time for %s failed" % (date,)) ( datelog, matchlog ) = datelog self.assertEqual(int(datelog), dateUnix) @@ -70,7 +73,7 @@ class DateDetectorTest(LogCaptureTestCase): for date in ("%s", "[%s]", "[%s.555]", "audit(%s.555:101)"): date = date % dateUnix log = date + " [sshd] error: PAM: Authentication failure" - datelog = self.__datedetector.getTime(log) + datelog = self.datedetector.getTime(log) self.assertFalse(datelog) def testGetTime(self): @@ -80,7 +83,7 @@ class DateDetectorTest(LogCaptureTestCase): # is not correctly determined atm, since year is not present # in the log entry. Since this doesn't effect the operation # of fail2ban -- we just ignore incorrect day of the week - ( datelog, matchlog ) = self.__datedetector.getTime(log) + ( datelog, matchlog ) = self.datedetector.getTime(log) self.assertEqual(datelog, dateUnix) self.assertEqual(matchlog.group(), 'Jan 23 21:59:59') @@ -140,7 +143,7 @@ class DateDetectorTest(LogCaptureTestCase): if not bound and prefix == "word-boundary": continue logSys.debug(' -- test %-5s for %r', should_match, log) # with getTime: - logtime = self.__datedetector.getTime(log) + logtime = self.datedetector.getTime(log) if should_match: self.assertNotEqual(logtime, None, "getTime retrieved nothing: failure for %s by prefix %r, anchored: %r, log: %s" % ( sdate, prefix, anchored, log)) @@ -152,8 +155,8 @@ class DateDetectorTest(LogCaptureTestCase): self.assertEqual(logtime, None, "getTime should have not matched for %r by prefix %r Got: %s" % (sdate, prefix, logtime)) # with getTime(matchTime) - this combination used in filter: - (timeMatch, template) = matchTime = self.__datedetector.matchTime(log) - logtime = self.__datedetector.getTime(log, matchTime) + (timeMatch, template) = matchTime = self.datedetector.matchTime(log) + logtime = self.datedetector.getTime(log, matchTime) logSys.debug(' -- found - %r', template.name if timeMatch else False) if should_match: self.assertNotEqual(logtime, None, @@ -168,26 +171,26 @@ class DateDetectorTest(LogCaptureTestCase): logSys.debug(' -- OK') def testAllUniqueTemplateNames(self): - self.assertRaises(ValueError, self.__datedetector.appendTemplate, - self.__datedetector.templates[0]) + self.assertRaises(ValueError, self.datedetector.appendTemplate, + self.datedetector.templates[0]) def testFullYearMatch_gh130(self): # see https://github.com/fail2ban/fail2ban/pull/130 # yoh: unfortunately this test is not really effective to reproduce the # situation but left in place to assure consistent behavior mu = time.mktime(datetime.datetime(2012, 10, 11, 2, 37, 17).timetuple()) - logdate = self.__datedetector.getTime('2012/10/11 02:37:17 [error] 18434#0') + logdate = self.datedetector.getTime('2012/10/11 02:37:17 [error] 18434#0') self.assertNotEqual(logdate, None) ( logTime, logMatch ) = logdate self.assertEqual(logTime, mu) self.assertEqual(logMatch.group(), '2012/10/11 02:37:17') # confuse it with year being at the end for i in xrange(10): - ( logTime, logMatch ) = self.__datedetector.getTime('11/10/2012 02:37:17 [error] 18434#0') + ( logTime, logMatch ) = self.datedetector.getTime('11/10/2012 02:37:17 [error] 18434#0') self.assertEqual(logTime, mu) self.assertEqual(logMatch.group(), '11/10/2012 02:37:17') # and now back to the original - ( logTime, logMatch ) = self.__datedetector.getTime('2012/10/11 02:37:17 [error] 18434#0') + ( logTime, logMatch ) = self.datedetector.getTime('2012/10/11 02:37:17 [error] 18434#0') self.assertEqual(logTime, mu) self.assertEqual(logMatch.group(), '2012/10/11 02:37:17') @@ -199,8 +202,7 @@ class DateDetectorTest(LogCaptureTestCase): self.assertEqual(t.matchDate('aaaac').group(), 'aaaac') def testAmbiguousInOrderedTemplates(self): - dd = DateDetector() - dd.addDefaultTemplate() + dd = self.datedetector for (debit, line, cnt) in ( # shortest distance to datetime should win: ("030324 0:03:59", "some free text 030324 0:03:59 -- 2003-03-07 17:05:01 ...", 1), @@ -224,6 +226,25 @@ class DateDetectorTest(LogCaptureTestCase): self.assertTrue(match) self.assertEqual(match.group(), debit) + def testLowLevelLogging(self): + # test coverage for the deep (heavy) debug messages: + try: + self.__old_eff_level = datedetector.logLevel + if datedetector.logLevel < logSys.getEffectiveLevel()+1: + datedetector.logLevel = logSys.getEffectiveLevel()+1 + dd = self.datedetector + i = 0 + for (line, cnt) in ( + ("server mysqld[5906]: 2005-10-07 06:09:%02i 5907 [Warning] Access denied", 2), + ("server mysqld[5906]: 051007 06:10:%02i 5907 [Warning] Access denied", 5), + ("server mysqld[5906]: 2005-10-07 06:09:%02i 5907 [Warning] Access denied", 10), + ): + for i in range(i, i+cnt+1): + logSys.debug('== test: %r', (line % i, cnt)) + match, template = dd.matchTime(line % i) + self.assertTrue(match) + finally: + datedetector.logLevel = self.__old_eff_level iso8601 = DatePatternRegex("%Y-%m-%d[T ]%H:%M:%S(?:\.%f)?%z") From ab0ac2111c4e123b7f685235be99f515938fd193 Mon Sep 17 00:00:00 2001 From: sebres Date: Wed, 5 Oct 2016 19:34:21 +0200 Subject: [PATCH 06/19] added possibility to specify more precise default date pattern: - `datepattern = {^LN-BEG}` - only line-begin anchored default patterns (matches date only at begin of line, or with max distance up to 2 non-alphanumeric characters from line-begin); - `datepattern = {*WD-BEG}` - only word-begin anchored default patterns; - `datepattern = ^prefix{DATE}suffix` - exact specified default patterns (using prefix and suffix); common filter configs gets a more precise, line-begin anchored (datepattern = {^LN-BEG}) resp. custom anchoring default date-patterns; --- config/filter.d/3proxy.conf | 2 + config/filter.d/apache-common.conf | 2 + config/filter.d/apache-pass.conf | 4 -- config/filter.d/assp.conf | 2 + config/filter.d/asterisk.conf | 1 + config/filter.d/common.conf | 3 ++ config/filter.d/counter-strike.conf | 2 - config/filter.d/courier-auth.conf | 2 + config/filter.d/directadmin.conf | 1 - config/filter.d/freeswitch.conf | 2 + config/filter.d/kerio.conf | 2 - config/filter.d/monit.conf | 2 +- config/filter.d/murmur.conf | 3 +- config/filter.d/nginx-http-auth.conf | 2 + config/filter.d/nginx-limit-req.conf | 1 + config/filter.d/openhab.conf | 1 - config/filter.d/oracleims.conf | 4 +- config/filter.d/selinux-common.conf | 2 + config/filter.d/sogo-auth.conf | 4 +- config/filter.d/squirrelmail.conf | 2 - config/filter.d/sshd.conf | 4 +- fail2ban/client/fail2banregex.py | 5 +- fail2ban/client/filterreader.py | 17 ++++--- fail2ban/server/datedetector.py | 48 +++++++++++++------ fail2ban/server/datetemplate.py | 20 ++++++-- fail2ban/server/filter.py | 35 ++++++++++---- fail2ban/server/strptime.py | 1 + fail2ban/tests/files/filter.d/testcase01.conf | 1 - fail2ban/tests/servertestcase.py | 2 +- 29 files changed, 120 insertions(+), 57 deletions(-) diff --git a/config/filter.d/3proxy.conf b/config/filter.d/3proxy.conf index 299c3a29..76c7573c 100644 --- a/config/filter.d/3proxy.conf +++ b/config/filter.d/3proxy.conf @@ -9,6 +9,8 @@ failregex = ^\s[+-]\d{4} \S+ \d{3}0[1-9] \S+ :\d+ [\d.]+:\d+ \d+ \d+ \d+\s ignoreregex = +datepattern = {^LN-BEG} + # DEV Notes: # http://www.3proxy.ru/howtoe.asp#ERRORS indicates that 01-09 are # all authentication problems (%E field) diff --git a/config/filter.d/apache-common.conf b/config/filter.d/apache-common.conf index c843c92e..3d1f902b 100644 --- a/config/filter.d/apache-common.conf +++ b/config/filter.d/apache-common.conf @@ -10,6 +10,8 @@ after = apache-common.local _apache_error_client = \[\] \[(:?error|\S+:\S+)\]( \[pid \d+(:\S+ \d+)?\])? \[client (:\d{1,5})?\] +datepattern = {^LN-BEG} + # Common prefix for [error] apache messages which also would include # Depending on the version it could be # 2.2: [Sat Jun 01 11:23:08 2013] [error] [client 1.2.3.4] diff --git a/config/filter.d/apache-pass.conf b/config/filter.d/apache-pass.conf index dd00f953..7bdff9c0 100644 --- a/config/filter.d/apache-pass.conf +++ b/config/filter.d/apache-pass.conf @@ -3,10 +3,6 @@ # # The knocking request must have a referer. -[INCLUDES] - -before = apache-common.conf - [Definition] failregex = ^ - \w+ \[\] "GET HTTP/1\.[01]" 200 \d+ ".*" "[^-].*"$ diff --git a/config/filter.d/assp.conf b/config/filter.d/assp.conf index 278e25cb..8d6367c9 100644 --- a/config/filter.d/assp.conf +++ b/config/filter.d/assp.conf @@ -20,6 +20,8 @@ failregex = ^(:? \[SSL-out\])? max sender authentication errors \(\d{,3}\ ignoreregex = +datepattern = {^LN-BEG} + # DEV Notes: # V1 Examples matches: # Apr-27-13 02:33:09 Blocking 217.194.197.97 - too much AUTH errors (41); diff --git a/config/filter.d/asterisk.conf b/config/filter.d/asterisk.conf index 79bd1ff2..df55d288 100644 --- a/config/filter.d/asterisk.conf +++ b/config/filter.d/asterisk.conf @@ -31,6 +31,7 @@ failregex = ^%(__prefix_line)s%(log_prefix)s Registration from '[^']*' failed fo ignoreregex = +datepattern = {^LN-BEG} # Author: Xavier Devlamynck / Daniel Black # diff --git a/config/filter.d/common.conf b/config/filter.d/common.conf index 586f428a..a8cba188 100644 --- a/config/filter.d/common.conf +++ b/config/filter.d/common.conf @@ -61,4 +61,7 @@ __prefix_line = %(__date_ambit)s?\s*(?:%(__bsd_syslog_verbose)s\s+)?(?:%(__hostn # pam_ldap __pam_auth = pam_unix +# standardly all formats using prefix have line-begin anchored date: +datepattern = {^LN-BEG} + # Author: Yaroslav Halchenko diff --git a/config/filter.d/counter-strike.conf b/config/filter.d/counter-strike.conf index a896b5ca..294927bc 100644 --- a/config/filter.d/counter-strike.conf +++ b/config/filter.d/counter-strike.conf @@ -8,8 +8,6 @@ failregex = ^: Bad Rcon: "rcon \d+ "\S+" sv_contact ".*?"" from ":\d+"$ ignoreregex = -[Init] - datepattern = ^L %%d/%%m/%%Y - %%H:%%M:%%S diff --git a/config/filter.d/courier-auth.conf b/config/filter.d/courier-auth.conf index 1170a63a..87ee55b4 100644 --- a/config/filter.d/courier-auth.conf +++ b/config/filter.d/courier-auth.conf @@ -15,5 +15,7 @@ failregex = ^%(__prefix_line)sLOGIN FAILED, user=.*, ip=\[\]$ ignoreregex = +datepattern = {^LN-BEG} + # Author: Christoph Haas # Modified by: Cyril Jaquier diff --git a/config/filter.d/directadmin.conf b/config/filter.d/directadmin.conf index 7622e548..87c7802f 100644 --- a/config/filter.d/directadmin.conf +++ b/config/filter.d/directadmin.conf @@ -13,7 +13,6 @@ failregex = ^: \'\' \d{1,3} failed login attempt(s)?. \s* ignoreregex = -[Init] datepattern = ^%%Y:%%m:%%d-%%H:%%M:%%S # diff --git a/config/filter.d/freeswitch.conf b/config/filter.d/freeswitch.conf index 29940240..4759fbed 100644 --- a/config/filter.d/freeswitch.conf +++ b/config/filter.d/freeswitch.conf @@ -26,6 +26,8 @@ failregex = %(_pref_line)s \[WARNING\] sofia_reg\.c:\d+ SIP auth (failure|challe ignoreregex = +datepattern = {^LN-BEG} + # Author: Rupa SChomaker, soapee01, Daniel Black # https://freeswitch.org/confluence/display/FREESWITCH/Fail2Ban # Thanks to Jim on mailing list of samples and guidance diff --git a/config/filter.d/kerio.conf b/config/filter.d/kerio.conf index 313c9b36..e0d94753 100644 --- a/config/filter.d/kerio.conf +++ b/config/filter.d/kerio.conf @@ -9,8 +9,6 @@ failregex = ^ SMTP Spam attack detected from , ignoreregex = -[Init] - datepattern = ^\[%%d/%%b/%%Y %%H:%%M:%%S\] # DEV NOTES: diff --git a/config/filter.d/monit.conf b/config/filter.d/monit.conf index 599bd363..b652a1f4 100644 --- a/config/filter.d/monit.conf +++ b/config/filter.d/monit.conf @@ -13,7 +13,7 @@ before = common.conf _daemon = monit # Regexp for previous (accessing monit httpd) and new (access denied) versions -failregex = ^\[[A-Z]+\s+\]\s*error\s*:\s*Warning:\s+Client '' supplied (?:unknown user '[^']+'|wrong password for user '[^']*') accessing monit httpd$ +failregex = ^\[\s*\]\s*error\s*:\s*Warning:\s+Client '' supplied (?:unknown user '[^']+'|wrong password for user '[^']*') accessing monit httpd$ ^%(__prefix_line)s\w+: access denied -- client : (?:unknown user '[^']+'|wrong password for user '[^']*'|empty password)$ # Ignore login with empty user (first connect, no user specified) diff --git a/config/filter.d/murmur.conf b/config/filter.d/murmur.conf index 3775a9d2..507bbd2f 100644 --- a/config/filter.d/murmur.conf +++ b/config/filter.d/murmur.conf @@ -15,13 +15,14 @@ _daemon = murmurd # variable in your server config file (murmur.ini / mumble-server.ini). _usernameregex = [^>]+ -_prefix = [\n\s]*(\.\d{3})?\s+\d+ => <\d+:%(_usernameregex)s\(-1\)> Rejected connection from :\d+: +_prefix = \s+\d+ => <\d+:%(_usernameregex)s\(-1\)> Rejected connection from :\d+: failregex = ^%(_prefix)s Invalid server password$ ^%(_prefix)s Wrong certificate or password for existing user$ ignoreregex = +datepattern = ^{DATE} # DEV Notes: # diff --git a/config/filter.d/nginx-http-auth.conf b/config/filter.d/nginx-http-auth.conf index a689f66a..e45a7f49 100644 --- a/config/filter.d/nginx-http-auth.conf +++ b/config/filter.d/nginx-http-auth.conf @@ -8,6 +8,8 @@ failregex = ^ \[error\] \d+#\d+: \*\d+ user "\S+":? (password mismatch|was not f ignoreregex = +datepattern = {^LN-BEG} + # DEV NOTES: # Based on samples in https://github.com/fail2ban/fail2ban/pull/43/files # Extensive search of all nginx auth failures not done yet. diff --git a/config/filter.d/nginx-limit-req.conf b/config/filter.d/nginx-limit-req.conf index 13cf75c2..c283849c 100644 --- a/config/filter.d/nginx-limit-req.conf +++ b/config/filter.d/nginx-limit-req.conf @@ -43,3 +43,4 @@ failregex = ^\s*\[error\] \d+#\d+: \*\d+ limiting requests, excess: [\d\.]+ by z ignoreregex = +datepattern = {^LN-BEG} diff --git a/config/filter.d/openhab.conf b/config/filter.d/openhab.conf index 83857c7a..f6b96339 100644 --- a/config/filter.d/openhab.conf +++ b/config/filter.d/openhab.conf @@ -9,7 +9,6 @@ [Definition] failregex = ^\s+-\s+-\s+\[\]\s+"[A-Z]+ .*" 401 \d+\s*$ -[Init] datepattern = %%d/%%b[^/]*/%%Y:%%H:%%M:%%S %%z diff --git a/config/filter.d/oracleims.conf b/config/filter.d/oracleims.conf index e951ff5d..7d75c322 100644 --- a/config/filter.d/oracleims.conf +++ b/config/filter.d/oracleims.conf @@ -52,10 +52,12 @@ before = common.conf # Note that you MUST have LOG_FORMAT=4 for this to work! # -failregex = ^.*tr="[A-Z]+\|[0-9.]+\|\d+\|\|\d+" ap="[^"]*" mi="Bad password" us="[^"]*" di="535 5.7.8 Bad username or password( \(Authentication failed\))?\."/>$ +failregex = tr="[A-Z]+\|[0-9.]+\|\d+\|\|\d+" ap="[^"]*" mi="Bad password" us="[^"]*" di="535 5.7.8 Bad username or password( \(Authentication failed\))?\."/>$ # Option: ignoreregex # Notes.: regex to ignore. If this regex matches, the line is ignored. # Values: TEXT # ignoreregex = + +datepattern = ^' for user '.*' might not have worked( - password policy: \d* grace: -?\d* expire: -?\d* bound: -?\d*)?\s*$ -ignoreregex = +ignoreregex = "^" + +datepattern = {^LN-BEG} # # DEV Notes: diff --git a/config/filter.d/squirrelmail.conf b/config/filter.d/squirrelmail.conf index af0c38e7..31e922e6 100644 --- a/config/filter.d/squirrelmail.conf +++ b/config/filter.d/squirrelmail.conf @@ -5,8 +5,6 @@ failregex = ^ \[LOGIN_ERROR\].*from : Unknown user or password incorrect\. ignoreregex = -[Init] - datepattern = ^%%m/%%d/%%Y %%H:%%M:%%S # DEV NOTES: diff --git a/config/filter.d/sshd.conf b/config/filter.d/sshd.conf index eeb1518e..c1c8eacf 100644 --- a/config/filter.d/sshd.conf +++ b/config/filter.d/sshd.conf @@ -38,13 +38,13 @@ failregex = ^%(__prefix_line)s(?:error: PAM: )?[aA]uthentication (?:failure|erro ignoreregex = -[Init] - # "maxlines" is number of log lines to buffer for multi-line regex searches maxlines = 10 journalmatch = _SYSTEMD_UNIT=sshd.service + _COMM=sshd +datepattern = {^LN-BEG} + # DEV Notes: # # "Failed \S+ for .*? from ..." failregex uses non-greedy catch-all because diff --git a/fail2ban/client/fail2banregex.py b/fail2ban/client/fail2banregex.py index 6680b097..bf49e7a6 100644 --- a/fail2ban/client/fail2banregex.py +++ b/fail2ban/client/fail2banregex.py @@ -474,8 +474,9 @@ class Fail2banRegex(object): if self._verbose or template.hits: out.append("[%d] %s" % (template.hits, template.name)) if self._verbose_date: - out.append(" # weight: %3s, pattern: %s" % ( - template.weight, getattr(template, 'pattern', ''),)) + out.append(" # weight: %.3f (%.3f), pattern: %s" % ( + template.weight, template.template.weight, + getattr(template, 'pattern', ''),)) out.append(" # regex: %s" % (getattr(template, 'regex', ''),)) pprint_list(out, "[# of hits] date format") diff --git a/fail2ban/client/filterreader.py b/fail2ban/client/filterreader.py index 8b30f914..9bd94fd6 100644 --- a/fail2ban/client/filterreader.py +++ b/fail2ban/client/filterreader.py @@ -40,6 +40,9 @@ class FilterReader(DefinitionInitConfigReader): _configOpts = { "ignoreregex": ["string", None], "failregex": ["string", ""], + "maxlines": ["int", None], + "datepattern": ["string", None], + "journalmatch": ["string", None], } def setFile(self, fileName): @@ -74,16 +77,16 @@ class FilterReader(DefinitionInitConfigReader): stream.append(["multi-set", self._jailName, "add" + opt, multi]) elif len(multi): stream.append(["set", self._jailName, "add" + opt, multi[0]]) - if self._initOpts: - if 'maxlines' in self._initOpts: + elif opt == 'maxlines': # We warn when multiline regex is used without maxlines > 1 # therefore keep sure we set this option first. - stream.insert(0, ["set", self._jailName, "maxlines", self._initOpts["maxlines"]]) - if 'datepattern' in self._initOpts: - stream.append(["set", self._jailName, "datepattern", self._initOpts["datepattern"]]) + stream.insert(0, ["set", self._jailName, "maxlines", value]) + elif opt == 'datepattern': + stream.append(["set", self._jailName, "datepattern", value]) # Do not send a command if the match is empty. - if self._initOpts.get("journalmatch", '') != '': - for match in self._initOpts["journalmatch"].split("\n"): + elif opt == 'journalmatch': + for match in value.split("\n"): + if match == '': continue stream.append( ["set", self._jailName, "addjournalmatch"] + shlex.split(match)) diff --git a/fail2ban/server/datedetector.py b/fail2ban/server/datedetector.py index ba2b6377..0aac4337 100644 --- a/fail2ban/server/datedetector.py +++ b/fail2ban/server/datedetector.py @@ -26,7 +26,7 @@ import time from threading import Lock -from .datetemplate import DateTemplate, DatePatternRegex, DateTai64n, DateEpoch +from .datetemplate import re, DateTemplate, DatePatternRegex, DateTai64n, DateEpoch from ..helpers import getLogger # Gets the instance of the logger. @@ -34,6 +34,8 @@ logSys = getLogger(__name__) logLevel = 6 +RE_DATE_PREMATCH = re.compile("\{DATE\}", re.IGNORECASE) + class DateDetectorCache(object): """Implements the caching of the default templates list. @@ -54,15 +56,18 @@ class DateDetectorCache(object): self._addDefaultTemplate() return self.__templates - def _cacheTemplate(self, template): + def _cacheTemplate(self, template, lineBeginOnly=False): """Cache Fail2Ban's default template. """ if isinstance(template, str): # exact given template with word benin-end boundary: - template = DatePatternRegex(template) + if not lineBeginOnly: + template = DatePatternRegex(template) + else: + template = DatePatternRegex(template, wordBegin='start') # additional template, that prefers datetime at start of a line (safety+performance feature): - if hasattr(template, 'regex'): + if not lineBeginOnly and hasattr(template, 'regex'): template2 = copy.copy(template) regex = getattr(template, 'pattern', template.regex) template2.setRegex(regex, wordBegin='start', wordEnd=True) @@ -90,6 +95,7 @@ class DateDetectorCache(object): # prefixed with optional time zone (monit): # PDT Apr 16 21:05:29 self._cacheTemplate("(?:%z )?(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?") + self._cacheTemplate("(?:%Z )?(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?") # asctime with optional day, subsecond and/or year coming after day # http://bugs.debian.org/798923 # Sun Jan 23 2005 21:59:59.011 @@ -114,17 +120,18 @@ class DateDetectorCache(object): # TAI64N self._cacheTemplate(DateTai64n()) # Epoch + self._cacheTemplate(DateEpoch(lineBeginOnly=True), lineBeginOnly=True) self._cacheTemplate(DateEpoch()) # Only time information in the log - self._cacheTemplate("^%H:%M:%S") + self._cacheTemplate("%H:%M:%S", lineBeginOnly=True) # <09/16/08@05:03:30> - self._cacheTemplate("^<%m/%d/%Exy@%H:%M:%S>") + self._cacheTemplate("<%m/%d/%Exy@%H:%M:%S>", lineBeginOnly=True) # MySQL: 130322 11:46:11 self._cacheTemplate("%Exy%Exm%Exd ?%H:%M:%S") # Apache Tomcat self._cacheTemplate("%b %d, %ExY %I:%M:%S %p") # ASSP: Apr-27-13 02:33:06 - self._cacheTemplate("^%b-%d-%Exy %H:%M:%S") + self._cacheTemplate("%b-%d-%Exy %H:%M:%S", lineBeginOnly=True) self.__templates = self.__tmpcache[0] + self.__tmpcache[1] del self.__tmpcache @@ -172,6 +179,8 @@ class DateDetector(object): self.__lastTemplIdx = 0x7fffffff # first free place: self.__firstUnused = 0 + # pre-match pattern: + self.__preMatch = None def _appendTemplate(self, template): name = template.name @@ -200,10 +209,18 @@ class DateDetector(object): template = DatePatternRegex(template) self._appendTemplate(template) - def addDefaultTemplate(self): + def addDefaultTemplate(self, filterTemplate=None, preMatch=None): """Add Fail2Ban's default set of date templates. """ for template in DateDetector._defCache.templates: + # filter if specified: + if filterTemplate is not None and not filterTemplate(template): continue + # if exact pattern available - create copy of template, contains replaced {DATE} with default regex: + if preMatch is not None: + regex = getattr(template, 'pattern', template.regex) + template = copy.copy(template) + template.setRegex(RE_DATE_PREMATCH.sub(regex, preMatch)) + # append date detector template: self._appendTemplate(template) @property @@ -230,7 +247,7 @@ class DateDetector(object): The regex match returned from the first successfully matched template. """ - #logSys.log(logLevel, "try to match time for line: %.250s", line) + #logSys.log(logLevel, "try to match time for line: %.120s", line) match = None # first try to use last template with same start/end position: i = self.__lastTemplIdx @@ -238,13 +255,16 @@ class DateDetector(object): ddtempl = self.__templates[i] template = ddtempl.template distance, endpos = self.__lastPos[0], self.__lastEndPos[0] + if logSys.getEffectiveLevel() <= logLevel-1: + logSys.log(logLevel-1, " try to match last template #%02i (from %r to %r): ...%r==%r %s %r==%r...", + i, distance, endpos, + line[distance-1:distance], self.__lastPos[1], + line[distance:endpos], + line[endpos:endpos+1], self.__lastEndPos[1]) # check same boundaries left/right, otherwise possible collision/pattern switch: if (line[distance-1:distance] == self.__lastPos[1] and line[endpos:endpos+1] == self.__lastEndPos[1] ): - if logSys.getEffectiveLevel() <= logLevel-1: - logSys.log(logLevel-1, " try to match last template #%02i (from %r to %r): ... %s ...", - i, distance, endpos, line[distance:endpos]) match = template.matchDate(line, distance, endpos) if match: distance = match.start() @@ -270,8 +290,8 @@ class DateDetector(object): if logSys.getEffectiveLevel() <= logLevel: logSys.log(logLevel, " matched time template #%02i (at %r <= %r, %r) %s", i, distance, ddtempl.distance, self.__lastPos[0], template.name) - ## if line-begin anchored - stop searching: - if template.flags & DateTemplate.LINE_BEGIN: + ## if line-begin/end anchored - stop searching: + if template.flags & (DateTemplate.LINE_BEGIN|DateTemplate.LINE_END): break ## [grave] if distance changed, possible date-match was found somewhere ## in body of message, so save this template, and search further: diff --git a/fail2ban/server/datetemplate.py b/fail2ban/server/datetemplate.py index d85c8f66..fcd51f99 100644 --- a/fail2ban/server/datetemplate.py +++ b/fail2ban/server/datetemplate.py @@ -33,9 +33,12 @@ from ..helpers import getLogger logSys = getLogger(__name__) RE_NO_WRD_BOUND_BEG = re.compile(r'^(?:\^|\*\*|\(\?:\^)') -RE_NO_WRD_BOUND_END = re.compile(r'(?(?<=^\[))|(?P(?<=\baudit\()))\d{10,11}\b(?:\.\d{3,6})?(?:(?(selinux)(?=:\d+\)))|(?(square)(?=\])))", - wordBegin=False) ;# already line begin resp. word begin anchored + if not lineBeginOnly: + regex = r"(?:^|(?P(?<=^\[))|(?P(?<=\baudit\()))\d{10,11}\b(?:\.\d{3,6})?(?:(?(selinux)(?=:\d+\)))|(?(square)(?=\])))" + self.setRegex(regex, wordBegin=False) ;# already line begin resp. word begin anchored + else: + regex = r"(?P(?<=^\[))\d{10,11}\b(?:\.\d{3,6})?(?(square)(?=\]))" + self.setRegex(regex, wordBegin='start', wordEnd=True) def getDate(self, line, dateMatch=None): """Method to return the date for a log line. diff --git a/fail2ban/server/filter.py b/fail2ban/server/filter.py index 9b1aefd6..cbf59fcb 100644 --- a/fail2ban/server/filter.py +++ b/fail2ban/server/filter.py @@ -35,7 +35,7 @@ from .ipdns import DNSUtils, IPAddr from .ticket import FailTicket from .jailthread import JailThread from .datedetector import DateDetector -from .datetemplate import DatePatternRegex, DateEpoch, DateTai64n +from .datetemplate import DateTemplate, DatePatternRegex, DateEpoch, DateTai64n from .mytime import MyTime from .failregex import FailRegex, Regex, RegexException from .action import CommandAction @@ -257,14 +257,29 @@ class Filter(JailThread): if pattern is None: self.dateDetector = None return - elif pattern.upper() == "EPOCH": - template = DateEpoch() - template.name = "Epoch" - elif pattern.upper() == "TAI64N": - template = DateTai64n() - template.name = "TAI64N" else: - template = DatePatternRegex(pattern) + key = pattern.upper() + if key == "EPOCH": + template = DateEpoch() + template.name = "Epoch" + elif key == "TAI64N": + template = DateTai64n() + template.name = "TAI64N" + elif key in ("{^LN-BEG}", "{*WD-BEG}", "{DEFAULT}"): + self.dateDetector = DateDetector() + flt = \ + lambda template: template.flags & DateTemplate.LINE_BEGIN if key == "{^LN-BEG}" else \ + lambda template: template.flags & DateTemplate.WORD_BEGIN if key == "{*WD-BEG}" else \ + None + self.dateDetector.addDefaultTemplate(flt) + return + elif "{DATE}" in key: + self.dateDetector = DateDetector() + self.dateDetector.addDefaultTemplate( + lambda template: not template.flags & DateTemplate.LINE_BEGIN, pattern) + return + else: + template = DatePatternRegex(pattern) self.dateDetector = DateDetector() self.dateDetector.appendTemplate(template) logSys.info(" date pattern `%r`: `%s`", @@ -280,9 +295,9 @@ class Filter(JailThread): def getDatePattern(self): if self.dateDetector is not None: templates = self.dateDetector.templates - if len(templates) > 1: + if len(templates) > 2: return None, "Default Detectors" - elif len(templates) == 1: + elif len(templates): if hasattr(templates[0], "pattern"): pattern = templates[0].pattern else: diff --git a/fail2ban/server/strptime.py b/fail2ban/server/strptime.py index 85045ed8..da00a57e 100644 --- a/fail2ban/server/strptime.py +++ b/fail2ban/server/strptime.py @@ -40,6 +40,7 @@ def _getYearCentRE(cent=(0,3), distance=3, now=(MyTime.now(), MyTime.alternateNo #todo: implement literal time zone support like CET, PST, PDT, etc (via pytz): #timeRE['z'] = r"%s?(?PZ|[+-]\d{2}(?::?[0-5]\d)?|[A-Z]{3})?" % timeRE['Z'] +timeRE['Z'] = r"(?P[A-Z]{3,5})" timeRE['z'] = r"(?PZ|[+-]\d{2}(?::?[0-5]\d)?)" # Extend build-in TimeRE with some exact patterns diff --git a/fail2ban/tests/files/filter.d/testcase01.conf b/fail2ban/tests/files/filter.d/testcase01.conf index 0dbbbd2a..b3700d0b 100644 --- a/fail2ban/tests/files/filter.d/testcase01.conf +++ b/fail2ban/tests/files/filter.d/testcase01.conf @@ -33,7 +33,6 @@ failregex = ^%(__prefix_line)s(?:error: PAM: )?Authentication failure for .* fro # ignoreregex = ^.+ john from host 192.168.1.1\s*$ -[Init] # "maxlines" is number of log lines to buffer for multi-line regex searches maxlines = 1 diff --git a/fail2ban/tests/servertestcase.py b/fail2ban/tests/servertestcase.py index d4320257..cd7ce5c9 100644 --- a/fail2ban/tests/servertestcase.py +++ b/fail2ban/tests/servertestcase.py @@ -1107,7 +1107,7 @@ class ServerConfigReaderTests(LogCaptureTestCase): # (we don't use it in this test at all): elif unittest.F2B.fast and ( len(cmd) > 3 and cmd[0] in ('set', 'multi-set') and cmd[2] == 'addfailregex' - ): + ): # pragma: no cover cmd[0] = "set" cmd[3] = "DUMMY-REGEX " # command to server, use cmdHandler direct instead of `transm.proceed(cmd)`: From e735f8f56835abeda6135c2088b54753193de7c9 Mon Sep 17 00:00:00 2001 From: sebres Date: Thu, 6 Oct 2016 15:27:49 +0200 Subject: [PATCH 07/19] default non-unicode and case-sensitive matching (by pattern templates automatically add `(?iu)` for "ignore case" and "unicode" if expected) --- fail2ban/server/datedetector.py | 47 ++++++++++++++------------ fail2ban/server/datetemplate.py | 25 ++++++++++---- fail2ban/tests/datedetectortestcase.py | 45 +++++++++++++++++++++--- 3 files changed, 85 insertions(+), 32 deletions(-) diff --git a/fail2ban/server/datedetector.py b/fail2ban/server/datedetector.py index 0aac4337..f256a425 100644 --- a/fail2ban/server/datedetector.py +++ b/fail2ban/server/datedetector.py @@ -254,27 +254,32 @@ class DateDetector(object): if i < len(self.__templates): ddtempl = self.__templates[i] template = ddtempl.template - distance, endpos = self.__lastPos[0], self.__lastEndPos[0] - if logSys.getEffectiveLevel() <= logLevel-1: - logSys.log(logLevel-1, " try to match last template #%02i (from %r to %r): ...%r==%r %s %r==%r...", - i, distance, endpos, - line[distance-1:distance], self.__lastPos[1], - line[distance:endpos], - line[endpos:endpos+1], self.__lastEndPos[1]) - # check same boundaries left/right, otherwise possible collision/pattern switch: - if (line[distance-1:distance] == self.__lastPos[1] and - line[endpos:endpos+1] == self.__lastEndPos[1] - ): - match = template.matchDate(line, distance, endpos) - if match: - distance = match.start() - endpos = match.end() - # if different position, possible collision/pattern switch: - if distance == self.__lastPos[0] and endpos == self.__lastEndPos[0]: - logSys.log(logLevel, " matched last time template #%02i", i) - else: - logSys.log(logLevel, " ** last pattern collision - pattern change, search ...") - match = None + if template.flags & DateTemplate.LINE_BEGIN: + if logSys.getEffectiveLevel() <= logLevel-1: + logSys.log(logLevel-1, " try to match last anchored template #%02i ...", i) + match = template.matchDate(line) + else: + distance, endpos = self.__lastPos[0], self.__lastEndPos[0] + if logSys.getEffectiveLevel() <= logLevel-1: + logSys.log(logLevel-1, " try to match last template #%02i (from %r to %r): ...%r==%r %s %r==%r...", + i, distance, endpos, + line[distance-1:distance], self.__lastPos[1], + line[distance:endpos], + line[endpos:endpos+1], self.__lastEndPos[1]) + # check same boundaries left/right, otherwise possible collision/pattern switch: + if (line[distance-1:distance] == self.__lastPos[1] and + line[endpos:endpos+1] == self.__lastEndPos[1] + ): + match = template.matchDate(line, distance, endpos) + if match: + distance = match.start() + endpos = match.end() + # if different position, possible collision/pattern switch: + if distance == self.__lastPos[0] and endpos == self.__lastEndPos[0]: + logSys.log(logLevel, " matched last time template #%02i", i) + else: + logSys.log(logLevel, " ** last pattern collision - pattern change, search ...") + match = None # search template and better match: if not match: self.__lastTemplIdx = 0x7fffffff diff --git a/fail2ban/server/datetemplate.py b/fail2ban/server/datetemplate.py index fcd51f99..9524e373 100644 --- a/fail2ban/server/datetemplate.py +++ b/fail2ban/server/datetemplate.py @@ -32,13 +32,18 @@ from ..helpers import getLogger logSys = getLogger(__name__) -RE_NO_WRD_BOUND_BEG = re.compile(r'^(?:\^|\*\*|\(\?:\^)') +RE_NO_WRD_BOUND_BEG = re.compile(r'^(?:\(\?\w+\))?(?:\^|\*\*|\(\?:\^)') RE_NO_WRD_BOUND_END = re.compile(r'(? Date: Thu, 6 Oct 2016 15:39:38 +0200 Subject: [PATCH 08/19] speedup SeekToTime test cases using exact date pattern... --- fail2ban/tests/filtertestcase.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fail2ban/tests/filtertestcase.py b/fail2ban/tests/filtertestcase.py index 6c1a637c..ea996072 100644 --- a/fail2ban/tests/filtertestcase.py +++ b/fail2ban/tests/filtertestcase.py @@ -476,6 +476,8 @@ class LogFileFilterPoll(unittest.TestCase): self.assertFalse(self.filter.isModified(LogFileFilterPoll.FILENAME)) def testSeekToTimeSmallFile(self): + # speedup search using exact date pattern: + self.filter.setDatePattern('^%ExY-%Exm-%Exd %ExH:%ExM:%ExS') fname = tempfile.mktemp(prefix='tmp_fail2ban', suffix='.log') time = 1417512352 f = open(fname, 'w') @@ -560,6 +562,8 @@ class LogFileFilterPoll(unittest.TestCase): _killfile(f, fname) def testSeekToTimeLargeFile(self): + # speedup search using exact date pattern: + self.filter.setDatePattern('^%ExY-%Exm-%Exd %ExH:%ExM:%ExS') fname = tempfile.mktemp(prefix='tmp_fail2ban', suffix='.log') time = 1417512352 f = open(fname, 'w') From bd1eb70c52f078fea65d1688915487a5dea1f3c7 Mon Sep 17 00:00:00 2001 From: sebres Date: Thu, 6 Oct 2016 16:44:07 +0200 Subject: [PATCH 09/19] speedup template first time selection through pre-sorted template list by template hits --- fail2ban/server/datedetector.py | 15 +++++++++------ fail2ban/server/datetemplate.py | 3 +++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/fail2ban/server/datedetector.py b/fail2ban/server/datedetector.py index f256a425..7cbc0f5c 100644 --- a/fail2ban/server/datedetector.py +++ b/fail2ban/server/datedetector.py @@ -88,14 +88,9 @@ class DateDetectorCache(object): # simple date: 2005/01/23 21:59:59 # custom for syslog-ng 2006.12.21 06:43:20 self._cacheTemplate("%ExY(?P<_sep>[-/.])%m(?P=_sep)%d[T ]%H:%M:%S(?:[.,]%f)?(?:\s*%z)?") - # 20050123T215959, 20050123 215959 - self._cacheTemplate("%ExY%Exm%Exd[T ]%ExH%ExM%ExS(?:[.,]%f)?(?:\s*%z)?") # asctime with optional day, subsecond and/or year: # Sun Jan 23 21:59:59.011 2005 - # prefixed with optional time zone (monit): - # PDT Apr 16 21:05:29 self._cacheTemplate("(?:%z )?(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?") - self._cacheTemplate("(?:%Z )?(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?") # asctime with optional day, subsecond and/or year coming after day # http://bugs.debian.org/798923 # Sun Jan 23 2005 21:59:59.011 @@ -132,6 +127,12 @@ class DateDetectorCache(object): self._cacheTemplate("%b %d, %ExY %I:%M:%S %p") # ASSP: Apr-27-13 02:33:06 self._cacheTemplate("%b-%d-%Exy %H:%M:%S", lineBeginOnly=True) + # 20050123T215959, 20050123 215959 + self._cacheTemplate("%ExY%Exm%Exd[T ]%ExH%ExM%ExS(?:[.,]%f)?(?:\s*%z)?") + # prefixed with optional named time zone (monit): + # PDT Apr 16 21:05:29 + self._cacheTemplate("(?:%Z )?(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?") + # self.__templates = self.__tmpcache[0] + self.__tmpcache[1] del self.__tmpcache @@ -212,7 +213,9 @@ class DateDetector(object): def addDefaultTemplate(self, filterTemplate=None, preMatch=None): """Add Fail2Ban's default set of date templates. """ - for template in DateDetector._defCache.templates: + for template in sorted(DateDetector._defCache.templates, + lambda a,b: b.hits - a.hits + ): # filter if specified: if filterTemplate is not None and not filterTemplate(template): continue # if exact pattern available - create copy of template, contains replaced {DATE} with default regex: diff --git a/fail2ban/server/datetemplate.py b/fail2ban/server/datetemplate.py index 9524e373..70a4b1b5 100644 --- a/fail2ban/server/datetemplate.py +++ b/fail2ban/server/datetemplate.py @@ -65,6 +65,7 @@ class DateTemplate(object): self.name = "" self.weight = 1.0 self.flags = 0 + self.hits = 0 self._regex = "" self._cRegex = None @@ -133,6 +134,8 @@ class DateTemplate(object): if not self._cRegex: self._compileRegex() dateMatch = self._cRegex.search(line, *args); # pos, endpos + if dateMatch: + self.hits += 1 return dateMatch @abstractmethod From ae7297e16b0188953bcdd18967bd22982b7fbfbb Mon Sep 17 00:00:00 2001 From: sebres Date: Fri, 7 Oct 2016 14:57:45 +0200 Subject: [PATCH 10/19] more precise date template handling (WARNING: this commit creates possible incompatibilities): - datedetector rewritten more strict as earlier; - default templates can be specified exacter using prefix/suffix syntax (via `datepattern`); - more as one date pattern can be specified using option `datepattern` now (new-line separated); - some default options like `datepattern` can be specified directly in section `[Definition]`, that avoids contrary usage of unnecessarily `[Init]` section, because of performance (each extra section costs time); - option `datepattern` can be specified in jail also (jails without filters); - if first group specified, only this will be cut out from search log-line (e. g.: `^date:[({DATE})]` will cut out only datetime match pattern, and leaves `date:[] failure ip...` for searching in filter); - faster match and fewer searching of appropriate templates (DateDetector.matchTime calls rarer DateTemplate.matchDate now); - standard filters extended with exact prefixed or anchored date templates; template cache introduced (in opposition to default template cache, holds custom templates cached by pattern for possible common usage of same template/regex); --- config/filter.d/apache-badbots.conf | 3 + config/filter.d/apache-fakegooglebot.conf | 2 + config/filter.d/apache-pass.conf | 3 + config/filter.d/assp.conf | 3 +- config/filter.d/dovecot.conf | 5 +- config/filter.d/ejabberd-auth.conf | 4 +- config/filter.d/guacamole.conf | 5 +- config/filter.d/nginx-botsearch.conf | 3 + config/filter.d/nsd.conf | 3 + config/filter.d/php-url-fopen.conf | 3 + config/filter.d/portsentry.conf | 3 + config/filter.d/sogo-auth.conf | 5 +- config/filter.d/squid.conf | 3 + config/filter.d/tine20.conf | 3 + fail2ban/client/jailreader.py | 17 +--- fail2ban/server/datedetector.py | 99 ++++++++++++++----- fail2ban/server/datetemplate.py | 59 +++++++---- fail2ban/server/filter.py | 46 +++------ fail2ban/server/strptime.py | 3 - fail2ban/server/transmitter.py | 2 +- fail2ban/tests/action_d/test_badips.py | 1 + fail2ban/tests/action_d/test_smtp.py | 1 + fail2ban/tests/banmanagertestcase.py | 2 + fail2ban/tests/clientbeautifiertestcase.py | 1 + fail2ban/tests/clientreadertestcase.py | 1 + .../config/filter.d/zzz-generic-example.conf | 5 + fail2ban/tests/datedetectortestcase.py | 59 +++++------ fail2ban/tests/fail2banclienttestcase.py | 1 + fail2ban/tests/fail2banregextestcase.py | 9 ++ fail2ban/tests/failmanagertestcase.py | 1 + fail2ban/tests/files/logs/zzz-generic-example | 8 +- fail2ban/tests/filtertestcase.py | 24 +++++ fail2ban/tests/misctestcase.py | 1 + fail2ban/tests/samplestestcase.py | 1 + fail2ban/tests/servertestcase.py | 8 +- fail2ban/tests/sockettestcase.py | 1 + fail2ban/tests/utils.py | 18 +++- 37 files changed, 280 insertions(+), 136 deletions(-) diff --git a/config/filter.d/apache-badbots.conf b/config/filter.d/apache-badbots.conf index 48b30666..f42aa159 100644 --- a/config/filter.d/apache-badbots.conf +++ b/config/filter.d/apache-badbots.conf @@ -14,6 +14,9 @@ failregex = ^ -.*"(GET|POST|HEAD).*HTTP.*"(?:%(badbots)s|%(badbotscustom)s ignoreregex = +datepattern = ^[^\[]*\[({DATE}) + {^LN-BEG} + # DEV Notes: # List of bad bots fetched from http://www.user-agents.org # Generated on Thu Nov 7 14:23:35 PST 2013 by files/gen_badbots. diff --git a/config/filter.d/apache-fakegooglebot.conf b/config/filter.d/apache-fakegooglebot.conf index b8a73504..729410ad 100644 --- a/config/filter.d/apache-fakegooglebot.conf +++ b/config/filter.d/apache-fakegooglebot.conf @@ -6,6 +6,8 @@ failregex = ^ .*Googlebot.*$ ignoreregex = +datepattern = ^[^\[]*\[({DATE}) + {^LN-BEG} # DEV Notes: # diff --git a/config/filter.d/apache-pass.conf b/config/filter.d/apache-pass.conf index 7bdff9c0..3cab87b0 100644 --- a/config/filter.d/apache-pass.conf +++ b/config/filter.d/apache-pass.conf @@ -9,6 +9,9 @@ failregex = ^ - \w+ \[\] "GET HTTP/1\.[01]" 200 \d+ ".*" "[ ignoreregex = +datepattern = ^[^\[]*\[({DATE}) + {^LN-BEG} + [Init] knocking_url = /knocking/ diff --git a/config/filter.d/assp.conf b/config/filter.d/assp.conf index 8d6367c9..7cca81d2 100644 --- a/config/filter.d/assp.conf +++ b/config/filter.d/assp.conf @@ -20,7 +20,8 @@ failregex = ^(:? \[SSL-out\])? max sender authentication errors \(\d{,3}\ ignoreregex = -datepattern = {^LN-BEG} +datepattern = {^LN-BEG}%%b-%%d-%%Exy %%H:%%M:%%S + {^LN-BEG} # DEV Notes: # V1 Examples matches: diff --git a/config/filter.d/dovecot.conf b/config/filter.d/dovecot.conf index 136a3947..6f8510fc 100644 --- a/config/filter.d/dovecot.conf +++ b/config/filter.d/dovecot.conf @@ -17,10 +17,11 @@ failregex = ^%(__prefix_line)s(%(__pam_auth)s(\(dovecot:auth\))?:)?\s+authentica ignoreregex = -[Init] - journalmatch = _SYSTEMD_UNIT=dovecot.service +datepattern = {^LN-BEG}TAI64N + {^LN-BEG} + # DEV Notes: # * the first regex is essentially a copy of pam-generic.conf # * Probably doesn't do dovecot sql/ldap backends properly (resolved in edit 21/03/2016) diff --git a/config/filter.d/ejabberd-auth.conf b/config/filter.d/ejabberd-auth.conf index 512fdb7c..edd87463 100644 --- a/config/filter.d/ejabberd-auth.conf +++ b/config/filter.d/ejabberd-auth.conf @@ -25,8 +25,6 @@ failregex = ^=INFO REPORT==== ===\nI\(<0\.\d+\.0>:ejabberd_c2s:\d+\) : \([^)]+\ # ignoreregex = -[Init] - # "maxlines" is number of log lines to buffer for multi-line regex searches maxlines = 2 @@ -35,3 +33,5 @@ maxlines = 2 # Values: TEXT # journalmatch = + +datepattern = ^(?:=[^=]+={3,} )?({DATE}) diff --git a/config/filter.d/guacamole.conf b/config/filter.d/guacamole.conf index 49cecc5a..09b4e7b0 100644 --- a/config/filter.d/guacamole.conf +++ b/config/filter.d/guacamole.conf @@ -17,6 +17,9 @@ failregex = ^.*\nWARNING: Authentication attempt from for user "[^"]*" fa # ignoreregex = -[Init] # "maxlines" is number of log lines to buffer for multi-line regex searches maxlines = 2 + +datepattern = ^%%b %%d, %%ExY %%I:%%M:%%S %%p + ^WARNING:()** + {^LN-BEG} \ No newline at end of file diff --git a/config/filter.d/nginx-botsearch.conf b/config/filter.d/nginx-botsearch.conf index 6853e1e8..0be895b2 100644 --- a/config/filter.d/nginx-botsearch.conf +++ b/config/filter.d/nginx-botsearch.conf @@ -13,6 +13,9 @@ failregex = ^ \- \S+ \[\] \"(GET|POST|HEAD) \/ \S+\" 404 .+$ ignoreregex = +datepattern = {^LN-BEG}%%ExY(?P<_sep>[-/.])%%m(?P=_sep)%%d[T ]%%H:%%M:%%S(?:[.,]%%f)?(?:\s*%%z)? + ^[^\[]*\[({DATE}) + {^LN-BEG} # DEV Notes: # Based on apache-botsearch filter diff --git a/config/filter.d/nsd.conf b/config/filter.d/nsd.conf index 8f32f7be..bfd99544 100644 --- a/config/filter.d/nsd.conf +++ b/config/filter.d/nsd.conf @@ -26,3 +26,6 @@ failregex = ^%(__prefix_line)sinfo: ratelimit block .* query TYPE255$ ^%(__prefix_line)sinfo: .* refused, no acl matches\.$ ignoreregex = + +datepattern = {^LN-BEG}Epoch + {^LN-BEG} \ No newline at end of file diff --git a/config/filter.d/php-url-fopen.conf b/config/filter.d/php-url-fopen.conf index 87bd04c8..a7957c9d 100644 --- a/config/filter.d/php-url-fopen.conf +++ b/config/filter.d/php-url-fopen.conf @@ -18,3 +18,6 @@ ignoreregex = # http://blogs.buanzo.com.ar/2009/04/fail2ban-filter-for-php-injection-attacks.html#comment-1489 # # Author: Arturo 'Buanzo' Busleiman + +datepattern = ^[^\[]*\[({DATE}) + {^LN-BEG} diff --git a/config/filter.d/portsentry.conf b/config/filter.d/portsentry.conf index 27dca9b4..35ca2a3d 100644 --- a/config/filter.d/portsentry.conf +++ b/config/filter.d/portsentry.conf @@ -8,5 +8,8 @@ failregex = \/ Port\: [0-9]+ (TCP|UDP) Blocked$ ignoreregex = +datepattern = {^LN-BEG}Epoch + {^LN-BEG} + # Author: Pacop diff --git a/config/filter.d/sogo-auth.conf b/config/filter.d/sogo-auth.conf index 1a3d5292..48221dc0 100644 --- a/config/filter.d/sogo-auth.conf +++ b/config/filter.d/sogo-auth.conf @@ -8,7 +8,10 @@ failregex = ^ sogod \[\d+\]: SOGoRootPage Login from '' for user '.*' migh ignoreregex = "^" -datepattern = {^LN-BEG} +datepattern = {^LN-BEG}%%ExY(?P<_sep>[-/.])%%m(?P=_sep)%%d[T ]%%H:%%M:%%S(?:[.,]%%f)?(?:\s*%%z)? + {^LN-BEG}(?:%%a )?%%b %%d %%H:%%M:%%S(?:\.%%f)?(?: %%ExY)? + ^[^\[]*\[({DATE}) + {^LN-BEG} # # DEV Notes: diff --git a/config/filter.d/squid.conf b/config/filter.d/squid.conf index e26cab9c..58694c48 100644 --- a/config/filter.d/squid.conf +++ b/config/filter.d/squid.conf @@ -9,5 +9,8 @@ failregex = ^\s+\d\s\s+[A-Z_]+_DENIED/403 .*$ ignoreregex = +datepattern = {^LN-BEG}Epoch + {^LN-BEG} + # Author: Daniel Black diff --git a/config/filter.d/tine20.conf b/config/filter.d/tine20.conf index 0fa6eccd..a80d89e8 100644 --- a/config/filter.d/tine20.conf +++ b/config/filter.d/tine20.conf @@ -10,6 +10,9 @@ failregex = ^[\da-f]{5,} [\da-f]{5,} (-- none --|.*?)( \d+(\.\d+)?(h|m|s|ms)){0 ignoreregex = +datepattern = ^[^-]+ -- [^-]+ -- - ({DATE}) + {^LN-BEG} + # Author: Mika (mkl) from Tine20.org forum: https://www.tine20.org/forum/viewtopic.php?f=2&t=15688&p=54766 # Editor: Daniel Black # Advisor: Lars Kneschke diff --git a/fail2ban/client/jailreader.py b/fail2ban/client/jailreader.py index 9d01a693..3849c027 100644 --- a/fail2ban/client/jailreader.py +++ b/fail2ban/client/jailreader.py @@ -112,6 +112,7 @@ class JailReader(ConfigReader): ["string", "ignorecommand", None], ["string", "ignoreip", None], ["string", "filter", ""], + ["string", "datepattern", None], ["string", "action", ""]] # Before interpolation (substitution) add static options always available as default: @@ -195,6 +196,8 @@ class JailReader(ConfigReader): """ stream = [] + if self.__filter: + stream.extend(self.__filter.convert()) for opt, value in self.__opts.iteritems(): if opt == "logpath" and \ not self.__opts.get('backend', None).startswith("systemd"): @@ -216,17 +219,9 @@ class JailReader(ConfigReader): stream.append(["set", self.__name, "logencoding", value]) elif opt == "backend": backend = value - elif opt == "maxretry": - stream.append(["set", self.__name, "maxretry", value]) elif opt == "ignoreip": for ip in splitwords(value): stream.append(["set", self.__name, "addignoreip", ip]) - elif opt == "findtime": - stream.append(["set", self.__name, "findtime", value]) - elif opt == "bantime": - stream.append(["set", self.__name, "bantime", value]) - elif opt == "usedns": - stream.append(["set", self.__name, "usedns", value]) elif opt in ("failregex", "ignoreregex"): multi = [] for regex in value.split('\n'): @@ -237,10 +232,8 @@ class JailReader(ConfigReader): stream.append(["multi-set", self.__name, "add" + opt, multi]) elif len(multi): stream.append(["set", self.__name, "add" + opt, multi[0]]) - elif opt == "ignorecommand": - stream.append(["set", self.__name, "ignorecommand", value]) - if self.__filter: - stream.extend(self.__filter.convert()) + elif opt not in ('action', 'filter', 'enabled'): + stream.append(["set", self.__name, opt, value]) for action in self.__actions: if isinstance(action, (ConfigReaderUnshared, ConfigReader)): stream.extend(action.convert()) diff --git a/fail2ban/server/datedetector.py b/fail2ban/server/datedetector.py index 7cbc0f5c..cced6d91 100644 --- a/fail2ban/server/datedetector.py +++ b/fail2ban/server/datedetector.py @@ -27,6 +27,7 @@ import time from threading import Lock from .datetemplate import re, DateTemplate, DatePatternRegex, DateTai64n, DateEpoch +from .utils import Utils from ..helpers import getLogger # Gets the instance of the logger. @@ -90,7 +91,7 @@ class DateDetectorCache(object): self._cacheTemplate("%ExY(?P<_sep>[-/.])%m(?P=_sep)%d[T ]%H:%M:%S(?:[.,]%f)?(?:\s*%z)?") # asctime with optional day, subsecond and/or year: # Sun Jan 23 21:59:59.011 2005 - self._cacheTemplate("(?:%z )?(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?") + self._cacheTemplate("(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?") # asctime with optional day, subsecond and/or year coming after day # http://bugs.debian.org/798923 # Sun Jan 23 2005 21:59:59.011 @@ -112,8 +113,6 @@ class DateDetectorCache(object): # subseconds explicit to avoid possible %m<->%d confusion # with previous ("%d-%m-%ExY %H:%M:%S" by "%d(?P<_sep>[-/])%m(?P=_sep)(?:%ExY|%Exy) %H:%M:%S") self._cacheTemplate("%m-%d-%ExY %H:%M:%S(?:\.%f)?") - # TAI64N - self._cacheTemplate(DateTai64n()) # Epoch self._cacheTemplate(DateEpoch(lineBeginOnly=True), lineBeginOnly=True) self._cacheTemplate(DateEpoch()) @@ -132,6 +131,10 @@ class DateDetectorCache(object): # prefixed with optional named time zone (monit): # PDT Apr 16 21:05:29 self._cacheTemplate("(?:%Z )?(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?") + # +00:00 Jan 23 21:59:59.011 2005 + self._cacheTemplate("(?:%z )?(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?") + # TAI64N + self._cacheTemplate(DateTai64n()) # self.__templates = self.__tmpcache[0] + self.__tmpcache[1] del self.__tmpcache @@ -168,6 +171,7 @@ class DateDetector(object): templates """ _defCache = DateDetectorCache() + _patternCache = Utils.Cache(maxCount=1000, maxTime=60*60) def __init__(self): self.__templates = list() @@ -183,9 +187,10 @@ class DateDetector(object): # pre-match pattern: self.__preMatch = None - def _appendTemplate(self, template): + def _appendTemplate(self, template, ignoreDup=False): name = template.name if name in self.__known_names: + if ignoreDup: return raise ValueError( "There is already a template with name %s" % name) self.__known_names.add(name) @@ -207,24 +212,56 @@ class DateDetector(object): If a template already exists with the same name. """ if isinstance(template, str): - template = DatePatternRegex(template) + key = pattern = template + if '%' not in pattern: + key = pattern.upper() + template = DateDetector._patternCache.get(key) + + if not template: + if key in ("EPOCH", "{^LN-BEG}EPOCH", "^EPOCH"): + template = DateEpoch(lineBeginOnly=(key != "EPOCH")) + elif key in ("TAI64N", "{^LN-BEG}TAI64N", "^TAI64N"): + template = DateTai64n(wordBegin=('start' if key != "TAI64N" else False)) + elif key in ("{^LN-BEG}", "{*WD-BEG}", "{DEFAULT}"): + flt = \ + lambda template: template.flags & DateTemplate.LINE_BEGIN if key == "{^LN-BEG}" else \ + lambda template: template.flags & DateTemplate.WORD_BEGIN if key == "{*WD-BEG}" else \ + None + self.addDefaultTemplate(flt) + return + elif "{DATE}" in key: + self.addDefaultTemplate( + lambda template: not template.flags & DateTemplate.LINE_BEGIN, pattern) + return + else: + template = DatePatternRegex(pattern) + + DateDetector._patternCache.set(key, template) + self._appendTemplate(template) + logSys.info(" date pattern `%r`: `%s`", + getattr(template, 'pattern', ''), template.name) + logSys.debug(" date pattern regex for %r: %s", + getattr(template, 'pattern', ''), template.regex) def addDefaultTemplate(self, filterTemplate=None, preMatch=None): """Add Fail2Ban's default set of date templates. """ - for template in sorted(DateDetector._defCache.templates, - lambda a,b: b.hits - a.hits - ): + ignoreDup = len(self.__templates) > 0 + for template in DateDetector._defCache.templates: # filter if specified: if filterTemplate is not None and not filterTemplate(template): continue # if exact pattern available - create copy of template, contains replaced {DATE} with default regex: if preMatch is not None: - regex = getattr(template, 'pattern', template.regex) - template = copy.copy(template) - template.setRegex(RE_DATE_PREMATCH.sub(regex, preMatch)) - # append date detector template: - self._appendTemplate(template) + deftemplate = template + template = DateDetector._patternCache.get((preMatch, deftemplate.name)) + if not template: + regex = getattr(deftemplate, 'pattern', deftemplate.regex) + template = copy.copy(deftemplate) + template.setRegex(RE_DATE_PREMATCH.sub(regex, preMatch)) + DateDetector._patternCache.set((preMatch, deftemplate.name), template) + # append date detector template (ignore duplicate if some was added before default): + self._appendTemplate(template, ignoreDup=ignoreDup) @property def templates(self): @@ -250,17 +287,22 @@ class DateDetector(object): The regex match returned from the first successfully matched template. """ - #logSys.log(logLevel, "try to match time for line: %.120s", line) + # if no templates specified - default templates should be used: + if not len(self.__templates): + self.addDefaultTemplate() + logSys.log(logLevel-1, "try to match time for line: %.120s", line) match = None # first try to use last template with same start/end position: + ignoreBySearch = 0x7fffffff i = self.__lastTemplIdx if i < len(self.__templates): ddtempl = self.__templates[i] template = ddtempl.template - if template.flags & DateTemplate.LINE_BEGIN: + if template.flags & (DateTemplate.LINE_BEGIN|DateTemplate.LINE_END): if logSys.getEffectiveLevel() <= logLevel-1: logSys.log(logLevel-1, " try to match last anchored template #%02i ...", i) match = template.matchDate(line) + ignoreBySearch = i else: distance, endpos = self.__lastPos[0], self.__lastEndPos[0] if logSys.getEffectiveLevel() <= logLevel-1: @@ -278,18 +320,28 @@ class DateDetector(object): distance = match.start() endpos = match.end() # if different position, possible collision/pattern switch: - if distance == self.__lastPos[0] and endpos == self.__lastEndPos[0]: + if ( + template.flags & (DateTemplate.LINE_BEGIN|DateTemplate.LINE_END) or + (distance == self.__lastPos[0] and endpos == self.__lastEndPos[0]) + ): logSys.log(logLevel, " matched last time template #%02i", i) else: logSys.log(logLevel, " ** last pattern collision - pattern change, search ...") match = None + else: + logSys.log(logLevel, " ** last pattern not found - pattern change, search ...") # search template and better match: if not match: self.__lastTemplIdx = 0x7fffffff - logSys.log(logLevel, " search template ...") + logSys.log(logLevel, " search template (%i) ...", len(self.__templates)) found = None, 0x7fffffff, -1 i = 0 for ddtempl in self.__templates: + if logSys.getEffectiveLevel() <= logLevel-1: + logSys.log(logLevel-1, " try template #%02i: %s", i, ddtempl.name) + if i == ignoreBySearch: + i += 1 + continue template = ddtempl.template match = template.matchDate(line) if match: @@ -298,15 +350,18 @@ class DateDetector(object): if logSys.getEffectiveLevel() <= logLevel: logSys.log(logLevel, " matched time template #%02i (at %r <= %r, %r) %s", i, distance, ddtempl.distance, self.__lastPos[0], template.name) + ## last (or single) template - fast stop: + if i+1 >= len(self.__templates): + break ## if line-begin/end anchored - stop searching: if template.flags & (DateTemplate.LINE_BEGIN|DateTemplate.LINE_END): break + ## stop searching if next template still unused, but we had already hits: + if (distance == 0 and ddtempl.hits) and not self.__templates[i+1].template.hits: + break ## [grave] if distance changed, possible date-match was found somewhere ## in body of message, so save this template, and search further: - if ( - (distance > ddtempl.distance or distance > self.__lastPos[0]) and - len(self.__templates) > 1 - ): + if distance > ddtempl.distance or distance > self.__lastPos[0]: logSys.log(logLevel, " ** distance collision - pattern change, reserve") ## shortest of both: if distance < found[1]: @@ -374,7 +429,7 @@ class DateDetector(object): if date is not None: if logSys.getEffectiveLevel() <= logLevel: logSys.log(logLevel, " got time %f for %r using template %s", - date[0], date[1].group(), template.name) + date[0], date[1].group(1), template.name) return date except ValueError: pass diff --git a/fail2ban/server/datetemplate.py b/fail2ban/server/datetemplate.py index 70a4b1b5..3a9612aa 100644 --- a/fail2ban/server/datetemplate.py +++ b/fail2ban/server/datetemplate.py @@ -24,7 +24,7 @@ __author__ = "Cyril Jaquier" __copyright__ = "Copyright (c) 2004 Cyril Jaquier" __license__ = "GPL" -import re +import re, time from abc import abstractmethod from .strptime import reGroupDictStrptime, timeRE, getTimePatternRE @@ -32,9 +32,14 @@ from ..helpers import getLogger logSys = getLogger(__name__) -RE_NO_WRD_BOUND_BEG = re.compile(r'^(?:\(\?\w+\))?(?:\^|\*\*|\(\?:\^)') -RE_NO_WRD_BOUND_END = re.compile(r'(?(?<=^\[))|(?P(?<=\baudit\()))\d{10,11}\b(?:\.\d{3,6})?(?:(?(selinux)(?=:\d+\)))|(?(square)(?=\])))" + regex = r"((?:^|(?P(?<=^\[))|(?P(?<=\baudit\()))\d{10,11}\b(?:\.\d{3,6})?)(?:(?(selinux)(?=:\d+\)))|(?(square)(?=\])))" self.setRegex(regex, wordBegin=False) ;# already line begin resp. word begin anchored else: - regex = r"(?P(?<=^\[))\d{10,11}\b(?:\.\d{3,6})?(?(square)(?=\]))" + regex = r"((?P(?<=^\[))?\d{10,11}\b(?:\.\d{3,6})?)(?(square)(?=\]))" self.setRegex(regex, wordBegin='start', wordEnd=True) def getDate(self, line, dateMatch=None): @@ -199,7 +219,7 @@ class DateEpoch(DateTemplate): dateMatch = self.matchDate(line) if dateMatch: # extract part of format which represents seconds since epoch - return (float(dateMatch.group()), dateMatch) + return (float(dateMatch.group(1)), dateMatch) return None @@ -244,7 +264,13 @@ class DatePatternRegex(DateTemplate): self.setRegex(pattern) def setRegex(self, pattern, wordBegin=True, wordEnd=True): + # original pattern: self._pattern = pattern + # if explicit given {^LN-BEG} - remove it from pattern and set 'start' in wordBegin: + if wordBegin and RE_EXLINE_BOUND_BEG.search(pattern): + pattern = RE_EXLINE_BOUND_BEG.sub('', pattern) + wordBegin = 'start' + # wrap to regex: fmt = self._patternRE.sub(r'%(\1)s', pattern) self.name = fmt % self._patternName regex = fmt % timeRE @@ -285,12 +311,11 @@ class DateTai64n(DateTemplate): regex """ - def __init__(self): + def __init__(self, wordBegin=False): DateTemplate.__init__(self) self.name = "TAI64N" # We already know the format for TAI64N - # yoh: we should not add an additional front anchor - self.setRegex("@[0-9a-f]{24}", wordBegin=False) + self.setRegex("@[0-9a-f]{24}", wordBegin=wordBegin) def getDate(self, line, dateMatch=None): """Method to return the date for a log line. @@ -310,7 +335,7 @@ class DateTai64n(DateTemplate): dateMatch = self.matchDate(line) if dateMatch: # extract part of format which represents seconds since epoch - value = dateMatch.group() + value = dateMatch.group(1) seconds_since_epoch = value[2:17] # convert seconds from HEX into local time stamp return (int(seconds_since_epoch, 16), dateMatch) diff --git a/fail2ban/server/filter.py b/fail2ban/server/filter.py index cbf59fcb..7f7584cf 100644 --- a/fail2ban/server/filter.py +++ b/fail2ban/server/filter.py @@ -35,7 +35,6 @@ from .ipdns import DNSUtils, IPAddr from .ticket import FailTicket from .jailthread import JailThread from .datedetector import DateDetector -from .datetemplate import DateTemplate, DatePatternRegex, DateEpoch, DateTai64n from .mytime import MyTime from .failregex import FailRegex, Regex, RegexException from .action import CommandAction @@ -94,7 +93,6 @@ class Filter(JailThread): self.ticks = 0 self.dateDetector = DateDetector() - self.dateDetector.addDefaultTemplate() logSys.debug("Created %s" % self) def __repr__(self): @@ -258,34 +256,12 @@ class Filter(JailThread): self.dateDetector = None return else: - key = pattern.upper() - if key == "EPOCH": - template = DateEpoch() - template.name = "Epoch" - elif key == "TAI64N": - template = DateTai64n() - template.name = "TAI64N" - elif key in ("{^LN-BEG}", "{*WD-BEG}", "{DEFAULT}"): - self.dateDetector = DateDetector() - flt = \ - lambda template: template.flags & DateTemplate.LINE_BEGIN if key == "{^LN-BEG}" else \ - lambda template: template.flags & DateTemplate.WORD_BEGIN if key == "{*WD-BEG}" else \ - None - self.dateDetector.addDefaultTemplate(flt) - return - elif "{DATE}" in key: - self.dateDetector = DateDetector() - self.dateDetector.addDefaultTemplate( - lambda template: not template.flags & DateTemplate.LINE_BEGIN, pattern) - return - else: - template = DatePatternRegex(pattern) - self.dateDetector = DateDetector() - self.dateDetector.appendTemplate(template) - logSys.info(" date pattern `%r`: `%s`", - pattern, template.name) - logSys.debug(" date pattern regex for %r: %s", - pattern, template.regex) + dd = DateDetector() + if not isinstance(pattern, (list, tuple)): + pattern = filter(bool, map(str.strip, re.split('\n+', pattern))) + for pattern in pattern: + dd.appendTemplate(pattern) + self.dateDetector = dd ## # Get the date detector pattern, or Default Detectors if not changed @@ -295,7 +271,8 @@ class Filter(JailThread): def getDatePattern(self): if self.dateDetector is not None: templates = self.dateDetector.templates - if len(templates) > 2: + # lazy template init, by first match + if not len(templates) or len(templates) > 2: return None, "Default Detectors" elif len(templates): if hasattr(templates[0], "pattern"): @@ -303,6 +280,7 @@ class Filter(JailThread): else: pattern = None return pattern, templates[0].name + return None ## # Set the maximum retry value. @@ -483,9 +461,9 @@ class Filter(JailThread): (timeMatch, template) = self.dateDetector.matchTime(l) if timeMatch: tupleLine = ( - l[:timeMatch.start()], - l[timeMatch.start():timeMatch.end()], - l[timeMatch.end():], + l[:timeMatch.start(1)], + l[timeMatch.start(1):timeMatch.end(1)], + l[timeMatch.end(1):], (timeMatch, template) ) else: diff --git a/fail2ban/server/strptime.py b/fail2ban/server/strptime.py index da00a57e..643984d3 100644 --- a/fail2ban/server/strptime.py +++ b/fail2ban/server/strptime.py @@ -55,8 +55,6 @@ timeRE['ExS'] = r"(?P6[0-1]|[0-5]\d)" # respect possible run in the test-cases (alternate date used there): timeRE['ExY'] = r"(?P%s\d)" % _getYearCentRE(cent=(0,3), distance=3) timeRE['Exy'] = r"(?P%s\d)" % _getYearCentRE(cent=(2,3), distance=3) -# Special pattern "start of the line", analogous to `wordBegin='start'` of default templates: -timeRE['ExLB'] = r"(?:^|(?<=^\W)|(?<=^\W{2}))" def getTimePatternRE(): keys = timeRE.keys() @@ -70,7 +68,6 @@ def getTimePatternRE(): 'M': "Minute", 'p': "AMPM", 'S': "Second", 'U': "Yearweek", 'w': "Weekday", 'W': "Yearweek", 'y': 'Year2', 'Y': "Year", '%': "%", 'z': "Zone offset", 'f': "Microseconds", 'Z': "Zone name", - 'ExLB': '{^LN-BEG}', } for key in set(keys) - set(names): # may not have them all... if key.startswith('Ex'): diff --git a/fail2ban/server/transmitter.py b/fail2ban/server/transmitter.py index 2f5be043..a0dfc639 100644 --- a/fail2ban/server/transmitter.py +++ b/fail2ban/server/transmitter.py @@ -303,7 +303,7 @@ class Transmitter: actionvalue = command[4] setattr(action, actionkey, actionvalue) return getattr(action, actionkey) - raise Exception("Invalid command (no set action or not yet implemented)") + raise Exception("Invalid command %r (no set action or not yet implemented)" % (command[1],)) def __commandGet(self, command): name = command[0] diff --git a/fail2ban/tests/action_d/test_badips.py b/fail2ban/tests/action_d/test_badips.py index 64db44e7..2f3b6723 100644 --- a/fail2ban/tests/action_d/test_badips.py +++ b/fail2ban/tests/action_d/test_badips.py @@ -32,6 +32,7 @@ if sys.version_info >= (2,7): # pragma: no cover - may be unavailable def setUp(self): """Call before every test case.""" + super(BadIPsActionTest, self).setUp() unittest.F2B.SkipIfNoNetwork() self.jail = DummyJail() diff --git a/fail2ban/tests/action_d/test_smtp.py b/fail2ban/tests/action_d/test_smtp.py index 5c8b1923..dc28d5c0 100644 --- a/fail2ban/tests/action_d/test_smtp.py +++ b/fail2ban/tests/action_d/test_smtp.py @@ -45,6 +45,7 @@ class SMTPActionTest(unittest.TestCase): def setUp(self): """Call before every test case.""" + super(SMTPActionTest, self).setUp() self.jail = DummyJail() pythonModule = os.path.join(CONFIG_DIR, "action.d", "smtp.py") pythonModuleName = os.path.basename(pythonModule.rstrip(".py")) diff --git a/fail2ban/tests/banmanagertestcase.py b/fail2ban/tests/banmanagertestcase.py index 4d964425..2c9d6601 100644 --- a/fail2ban/tests/banmanagertestcase.py +++ b/fail2ban/tests/banmanagertestcase.py @@ -32,6 +32,7 @@ from ..server.ticket import BanTicket class AddFailure(unittest.TestCase): def setUp(self): """Call before every test case.""" + super(AddFailure, self).setUp() self.__ticket = BanTicket('193.168.0.128', 1167605999.0) self.__banManager = BanManager() @@ -134,6 +135,7 @@ class AddFailure(unittest.TestCase): class StatusExtendedCymruInfo(unittest.TestCase): def setUp(self): """Call before every test case.""" + super(StatusExtendedCymruInfo, self).setUp() unittest.F2B.SkipIfNoNetwork() self.__ban_ip = "93.184.216.34" self.__asn = "15133" diff --git a/fail2ban/tests/clientbeautifiertestcase.py b/fail2ban/tests/clientbeautifiertestcase.py index 0390a8ff..7d20e84e 100644 --- a/fail2ban/tests/clientbeautifiertestcase.py +++ b/fail2ban/tests/clientbeautifiertestcase.py @@ -32,6 +32,7 @@ class BeautifierTest(unittest.TestCase): def setUp(self): """ Call before every test case """ + super(BeautifierTest, self).setUp() self.b = Beautifier() def tearDown(self): diff --git a/fail2ban/tests/clientreadertestcase.py b/fail2ban/tests/clientreadertestcase.py index e68523c2..36722ff1 100644 --- a/fail2ban/tests/clientreadertestcase.py +++ b/fail2ban/tests/clientreadertestcase.py @@ -55,6 +55,7 @@ class ConfigReaderTest(unittest.TestCase): def setUp(self): """Call before every test case.""" + super(ConfigReaderTest, self).setUp() self.d = tempfile.mkdtemp(prefix="f2b-temp") self.c = ConfigReaderUnshared(basedir=self.d) diff --git a/fail2ban/tests/config/filter.d/zzz-generic-example.conf b/fail2ban/tests/config/filter.d/zzz-generic-example.conf index df30d725..8d1f2ae9 100644 --- a/fail2ban/tests/config/filter.d/zzz-generic-example.conf +++ b/fail2ban/tests/config/filter.d/zzz-generic-example.conf @@ -20,3 +20,8 @@ failregex = ^%(__prefix_line)sF2B: failure from $ # just to test multiple ignoreregex: ignoreregex = ^%(__prefix_line)sF2B: error from 192.0.2.251$ ^%(__prefix_line)sF2B: error from 192.0.2.252$ + +# specify only exact date patterns, +1 with %%Y to test usage of last known date by wrong dates like 0000-00-00... +datepattern = {^LN-BEG}%%ExY(?P<_sep>[-/.])%%m(?P=_sep)%%d[T ]%%H:%%M:%%S(?:[.,]%%f)?(?:\s*%%z)? + {^LN-BEG}(?:%%a )?%%b %%d %%H:%%M:%%S(?:\.%%f)?(?: %%ExY)? + {^LN-BEG}%%Y(?P<_sep>[-/.])%%m(?P=_sep)%%d[T ]%%H:%%M:%%S(?:[.,]%%f)?(?:\s*%%z)? diff --git a/fail2ban/tests/datedetectortestcase.py b/fail2ban/tests/datedetectortestcase.py index 6bf92582..fdb7ec71 100644 --- a/fail2ban/tests/datedetectortestcase.py +++ b/fail2ban/tests/datedetectortestcase.py @@ -58,6 +58,8 @@ class DateDetectorTest(LogCaptureTestCase): return self.__datedetector def testGetEpochTime(self): + self.__datedetector = DateDetector() + self.__datedetector.appendTemplate('EPOCH') # correct epoch time, using all variants: for dateUnix in (1138049999, 32535244799): for date in ("%s", "[%s]", "[%s.555]", "audit(%s.555:101)"): @@ -67,7 +69,7 @@ class DateDetectorTest(LogCaptureTestCase): self.assertTrue(datelog, "Parse epoch time for %s failed" % (date,)) ( datelog, matchlog ) = datelog self.assertEqual(int(datelog), dateUnix) - self.assertIn(matchlog.group(), (str(dateUnix), str(dateUnix)+'.555')) + self.assertIn(matchlog.group(1), (str(dateUnix), str(dateUnix)+'.555')) # wrong, no epoch time (< 10 digits, more as 11 digits, begin/end of word) : for dateUnix in ('123456789', '9999999999999999', '1138049999A', 'A1138049999'): for date in ("%s", "[%s]", "[%s.555]", "audit(%s.555:101)"): @@ -85,7 +87,7 @@ class DateDetectorTest(LogCaptureTestCase): # of fail2ban -- we just ignore incorrect day of the week ( datelog, matchlog ) = self.datedetector.getTime(log) self.assertEqual(datelog, dateUnix) - self.assertEqual(matchlog.group(), 'Jan 23 21:59:59') + self.assertEqual(matchlog.group(1), 'Jan 23 21:59:59') def testVariousTimes(self): """Test detection of various common date/time formats f2b should understand @@ -150,7 +152,7 @@ class DateDetectorTest(LogCaptureTestCase): ( logUnix, logMatch ) = logtime self.assertEqual(logUnix, dateUnix, "getTime comparison failure for %s: by prefix %r \"%s\" is not \"%s\"" % (sdate, prefix, logUnix, dateUnix)) - self.assertEqual(logMatch.group(), rdate) + self.assertEqual(logMatch.group(1), rdate) else: self.assertEqual(logtime, None, "getTime should have not matched for %r by prefix %r Got: %s" % (sdate, prefix, logtime)) @@ -164,7 +166,7 @@ class DateDetectorTest(LogCaptureTestCase): ( logUnix, logMatch ) = logtime self.assertEqual(logUnix, dateUnix, "getTime comparison failure for %s by prefix %r: \"%s\" is not \"%s\"" % (sdate, prefix, logUnix, dateUnix)) - self.assertEqual(logMatch.group(), rdate) + self.assertEqual(logMatch.group(1), rdate) else: self.assertEqual(logtime, None, "getTime should have not matched for %r by prefix %r Got: %s" % (sdate, prefix, logtime)) @@ -183,23 +185,23 @@ class DateDetectorTest(LogCaptureTestCase): self.assertNotEqual(logdate, None) ( logTime, logMatch ) = logdate self.assertEqual(logTime, mu) - self.assertEqual(logMatch.group(), '2012/10/11 02:37:17') + self.assertEqual(logMatch.group(1), '2012/10/11 02:37:17') # confuse it with year being at the end for i in xrange(10): ( logTime, logMatch ) = self.datedetector.getTime('11/10/2012 02:37:17 [error] 18434#0') self.assertEqual(logTime, mu) - self.assertEqual(logMatch.group(), '11/10/2012 02:37:17') + self.assertEqual(logMatch.group(1), '11/10/2012 02:37:17') # and now back to the original ( logTime, logMatch ) = self.datedetector.getTime('2012/10/11 02:37:17 [error] 18434#0') self.assertEqual(logTime, mu) - self.assertEqual(logMatch.group(), '2012/10/11 02:37:17') + self.assertEqual(logMatch.group(1), '2012/10/11 02:37:17') def testDateTemplate(self): t = DateTemplate() t.setRegex('^a{3,5}b?c*$') - self.assertEqual(t.regex, '^a{3,5}b?c*$') + self.assertEqual(t.regex, '^(a{3,5}b?c*)$') self.assertRaises(Exception, t.getDate, '') - self.assertEqual(t.matchDate('aaaac').group(), 'aaaac') + self.assertEqual(t.matchDate('aaaac').group(1), 'aaaac') ## no word boundaries left and right: t = DatePatternRegex() @@ -208,22 +210,22 @@ class DateDetectorTest(LogCaptureTestCase): self.assertFalse('**' in t.regex) # match date: dt = 'TIME:20050102T010203' - self.assertEqual(t.matchDate('X' + dt + 'X').group(), dt) - self.assertEqual(t.matchDate(dt).group(), dt) + self.assertEqual(t.matchDate('X' + dt + 'X').group(1), dt) + self.assertEqual(t.matchDate(dt).group(1), dt) # wrong year (for exact %ExY): dt = 'TIME:50050102T010203' self.assertFalse(t.matchDate(dt)) - ## start boundary left and word boundary right: + ## start boundary left and word boundary right (automatically if not **): t = DatePatternRegex() - t.pattern = '%ExLBtime:%ExY%Exm%ExdT%ExH%ExM%ExS' + t.pattern = '{^LN-BEG}time:%ExY%Exm%ExdT%ExH%ExM%ExS' self.assertTrue('^' in t.regex) # try match date: dt = 'time:20050102T010203' self.assertFalse(t.matchDate('X' + dt)) self.assertFalse(t.matchDate(dt + 'X')) - self.assertEqual(t.matchDate('##' + dt + '...').group(), dt) - self.assertEqual(t.matchDate(dt).group(), dt) + self.assertEqual(t.matchDate('##' + dt + '...').group(1), dt) + self.assertEqual(t.matchDate(dt).group(1), dt) # case sensitive: dt = 'TIME:20050102T010203' self.assertFalse(t.matchDate(dt)) @@ -232,9 +234,9 @@ class DateDetectorTest(LogCaptureTestCase): t = DatePatternRegex() t.pattern = '^%Y %b %d' self.assertTrue('(?iu)' in t.regex) - dt = '2005 jun 03'; self.assertEqual(t.matchDate(dt).group(), dt) - dt = '2005 Jun 03'; self.assertEqual(t.matchDate(dt).group(), dt) - dt = '2005 JUN 03'; self.assertEqual(t.matchDate(dt).group(), dt) + dt = '2005 jun 03'; self.assertEqual(t.matchDate(dt).group(1), dt) + dt = '2005 Jun 03'; self.assertEqual(t.matchDate(dt).group(1), dt) + dt = '2005 JUN 03'; self.assertEqual(t.matchDate(dt).group(1), dt) def testAmbiguousInOrderedTemplates(self): dd = self.datedetector @@ -259,7 +261,7 @@ class DateDetectorTest(LogCaptureTestCase): logSys.debug('Line: %s', line) match, template = dd.matchTime(line) self.assertTrue(match) - self.assertEqual(match.group(), debit) + self.assertEqual(match.group(1), debit) def testLowLevelLogging(self): # test coverage for the deep (heavy) debug messages: @@ -363,31 +365,30 @@ class CustomDateFormatsTest(unittest.TestCase): ('200333 010203', r'%Y%m%d %H%M%S', "text:200333 010203 | date:20031230 010203"), ('20031230 010203', r'%ExY%Exm%Exd %ExH%ExM%ExS', "text:200333 010203 | date:20031230 010203"), ('20031230 010203', None, "text:200333 010203 | date:20031230 010203"), - # Explicit bound in start of the line using %ExLB key, + # Explicit bound in start of the line using {^LN-BEG} key, # (negative) in the 1st case without line begin boundary - wrong date may be found, # (positive) in the 2nd case with line begin boundary - unexpected date / log line (not found) # (positive) and in 3th case with line begin boundary - find the correct date ("20030101 000000", "%ExY%Exm%Exd %ExH%ExM%ExS", "00001230 010203 - 20030101 000000"), - (None, "%ExLB%ExY%Exm%Exd %ExH%ExM%ExS", "00001230 010203 - 20030101 000000"), - ("20031230 010203", "%ExLB%ExY%Exm%Exd %ExH%ExM%ExS", "20031230 010203 - 20030101 000000"), - # Explicit bound in start of the line using %ExLB key, + (None, "{^LN-BEG}%ExY%Exm%Exd %ExH%ExM%ExS", "00001230 010203 - 20030101 000000"), + ("20031230 010203", "{^LN-BEG}%ExY%Exm%Exd %ExH%ExM%ExS", "20031230 010203 - 20030101 000000"), + # Explicit bound in start of the line using {^LN-BEG} key, # up to 2 non-alphanumeric chars front, ** - no word boundary on the right - ("20031230010203", "%ExLB%ExY%Exm%Exd%ExH%ExM%ExS**", "2003123001020320030101000000"), - ("20031230010203", "%ExLB%ExY%Exm%Exd%ExH%ExM%ExS**", "#2003123001020320030101000000"), - ("20031230010203", "%ExLB%ExY%Exm%Exd%ExH%ExM%ExS**", "##2003123001020320030101000000"), - ("20031230010203", "%ExLB%ExY%Exm%Exd%ExH%ExM%ExS", "[20031230010203]20030101000000"), + ("20031230010203", "{^LN-BEG}%ExY%Exm%Exd%ExH%ExM%ExS**", "2003123001020320030101000000"), + ("20031230010203", "{^LN-BEG}%ExY%Exm%Exd%ExH%ExM%ExS**", "#2003123001020320030101000000"), + ("20031230010203", "{^LN-BEG}%ExY%Exm%Exd%ExH%ExM%ExS**", "##2003123001020320030101000000"), + ("20031230010203", "{^LN-BEG}%ExY%Exm%Exd%ExH%ExM%ExS", "[20031230010203]20030101000000"), ): logSys.debug('== test: %r', (matched, dp, line)) if dp is None: dd = defDD else: - dp = DatePatternRegex(dp) dd = DateDetector() dd.appendTemplate(dp) date = dd.getTime(line) if matched: self.assertTrue(date) - self.assertEqual(matched, date[1].group()) + self.assertEqual(matched, date[1].group(1)) else: self.assertEqual(date, None) diff --git a/fail2ban/tests/fail2banclienttestcase.py b/fail2ban/tests/fail2banclienttestcase.py index e212ff23..8e25fd77 100644 --- a/fail2ban/tests/fail2banclienttestcase.py +++ b/fail2ban/tests/fail2banclienttestcase.py @@ -743,6 +743,7 @@ class Fail2banServerTest(Fail2banClientServerBase): "maxretry = 3", "findtime = 10m", "failregex = ^\s*failure (401|403) from ", + "datepattern = {^LN-BEG}EPOCH", "", "[test-jail1]", "backend = " + backend, "filter =", "action = ", diff --git a/fail2ban/tests/fail2banregextestcase.py b/fail2ban/tests/fail2banregextestcase.py index c62377a9..5767a3f7 100644 --- a/fail2ban/tests/fail2banregextestcase.py +++ b/fail2ban/tests/fail2banregextestcase.py @@ -101,6 +101,7 @@ class Fail2banRegexTest(LogCaptureTestCase): def testWrongIngnoreRE(self): (opts, args, fail2banRegex) = _Fail2banRegex( + "--datepattern", "{^LN-BEG}EPOCH", "test", r".*? from $", r".**" ) self.assertFalse(fail2banRegex.start(opts, args)) @@ -108,6 +109,7 @@ class Fail2banRegexTest(LogCaptureTestCase): def testDirectFound(self): (opts, args, fail2banRegex) = _Fail2banRegex( + "--datepattern", "^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?", "--print-all-matched", "--print-no-missed", "Dec 31 11:59:59 [sshd] error: PAM: Authentication failure for kevin from 192.0.2.0", r"Authentication failure for .*? from $" @@ -136,6 +138,7 @@ class Fail2banRegexTest(LogCaptureTestCase): def testDirectRE_1(self): (opts, args, fail2banRegex) = _Fail2banRegex( + "--datepattern", "^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?", "--print-all-matched", Fail2banRegexTest.FILENAME_01, Fail2banRegexTest.RE_00 @@ -151,6 +154,7 @@ class Fail2banRegexTest(LogCaptureTestCase): def testDirectRE_1raw(self): (opts, args, fail2banRegex) = _Fail2banRegex( + "--datepattern", "^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?", "--print-all-matched", "--raw", Fail2banRegexTest.FILENAME_01, Fail2banRegexTest.RE_00 @@ -160,6 +164,7 @@ class Fail2banRegexTest(LogCaptureTestCase): def testDirectRE_1raw_noDns(self): (opts, args, fail2banRegex) = _Fail2banRegex( + "--datepattern", "^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?", "--print-all-matched", "--raw", "--usedns=no", Fail2banRegexTest.FILENAME_01, Fail2banRegexTest.RE_00 @@ -169,6 +174,7 @@ class Fail2banRegexTest(LogCaptureTestCase): def testDirectRE_2(self): (opts, args, fail2banRegex) = _Fail2banRegex( + "--datepattern", "^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?", "--print-all-matched", Fail2banRegexTest.FILENAME_02, Fail2banRegexTest.RE_00 @@ -178,6 +184,7 @@ class Fail2banRegexTest(LogCaptureTestCase): def testVerbose(self): (opts, args, fail2banRegex) = _Fail2banRegex( + "--datepattern", "^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?", "--verbose", "--verbose-date", "--print-no-missed", Fail2banRegexTest.FILENAME_02, Fail2banRegexTest.RE_00 @@ -190,6 +197,7 @@ class Fail2banRegexTest(LogCaptureTestCase): def testWronChar(self): (opts, args, fail2banRegex) = _Fail2banRegex( + "--datepattern", "^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?", Fail2banRegexTest.FILENAME_WRONGCHAR, Fail2banRegexTest.FILTER_SSHD ) self.assertTrue(fail2banRegex.start(opts, args)) @@ -203,6 +211,7 @@ class Fail2banRegexTest(LogCaptureTestCase): def testWronCharDebuggex(self): (opts, args, fail2banRegex) = _Fail2banRegex( + "--datepattern", "^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?", "--debuggex", "--print-all-matched", Fail2banRegexTest.FILENAME_WRONGCHAR, Fail2banRegexTest.FILTER_SSHD ) diff --git a/fail2ban/tests/failmanagertestcase.py b/fail2ban/tests/failmanagertestcase.py index 6e7bf367..ed68e2a8 100644 --- a/fail2ban/tests/failmanagertestcase.py +++ b/fail2ban/tests/failmanagertestcase.py @@ -36,6 +36,7 @@ class AddFailure(unittest.TestCase): def setUp(self): """Call before every test case.""" + super(AddFailure, self).setUp() self.__items = None self.__failManager = FailManager() diff --git a/fail2ban/tests/files/logs/zzz-generic-example b/fail2ban/tests/files/logs/zzz-generic-example index 51d3974c..2044c387 100644 --- a/fail2ban/tests/files/logs/zzz-generic-example +++ b/fail2ban/tests/files/logs/zzz-generic-example @@ -30,8 +30,8 @@ Jun 21 16:55:02 machine kernel: [ 970.699396] @vserver_demo test- # failJSON: { "time": "2005-06-21T16:55:03", "match": true , "host": "192.0.2.3" } [Jun 21 16:55:03] machine kernel: [ 970.699396] @vserver_demo test-demo(pam_unix)[13709] [ID 255 test] F2B: failure from 192.0.2.3 -# -- wrong time direct in journal-line (using precise year pattern): -# failJSON: { "match": false} +# -- wrong time direct in journal-line (used last known date): +# failJSON: { "time": "2005-06-21T16:55:03", "match": true , "host": "192.0.2.1" } 0000-12-30 00:00:00 server test-demo[47831]: F2B: failure from 192.0.2.1 # -- wrong time after newline in message (plist without escaped newlines): # failJSON: { "match": false } @@ -42,8 +42,8 @@ Jun 22 20:37:04 server test-demo[402]: writeToStorage plist={ applicationDate = "0000-12-30 00:00:00 +0000"; # failJSON: { "match": false } } -# -- wrong time direct in journal-line (using precise year pattern): -# failJSON: { "match": false} +# -- wrong time direct in journal-line (used last known date): +# failJSON: { "time": "2005-06-22T20:37:04", "match": true , "host": "192.0.2.2" } 0000-12-30 00:00:00 server test-demo[47831]: F2B: failure from 192.0.2.2 # failJSON: { "time": "2005-06-21T16:56:02", "match": true , "host": "192.0.2.250" } diff --git a/fail2ban/tests/filtertestcase.py b/fail2ban/tests/filtertestcase.py index ea996072..227050f2 100644 --- a/fail2ban/tests/filtertestcase.py +++ b/fail2ban/tests/filtertestcase.py @@ -270,6 +270,7 @@ def _copy_lines_to_journal(in_, fields={},n=None, skip=0, terminal_line=""): # p class BasicFilter(unittest.TestCase): def setUp(self): + super(BasicFilter, self).setUp() self.filter = Filter('name') def testGetSetUseDNS(self): @@ -363,6 +364,7 @@ class IgnoreIP(LogCaptureTestCase): setUpMyTime() self.filter.addIgnoreIP('192.168.1.0/25') self.filter.addFailRegex('') + self.filter.setDatePattern('{^LN-BEG}EPOCH') self.filter.processLineAndAdd('1387203300.222 192.168.1.32') self.assertLogged('Ignore 192.168.1.32') tearDownMyTime() @@ -461,6 +463,7 @@ class LogFileFilterPoll(unittest.TestCase): def setUp(self): """Call before every test case.""" + super(LogFileFilterPoll, self).setUp() self.filter = FilterPoll(DummyJail()) self.filter.addLogPath(LogFileFilterPoll.FILENAME) @@ -653,6 +656,8 @@ class LogFileMonitor(LogCaptureTestCase): self.assertLogged('Unable to open %s' % self.name) def testErrorProcessLine(self): + # speedup search using exact date pattern: + self.filter.setDatePattern('^%ExY-%Exm-%Exd %ExH:%ExM:%ExS') self.filter.sleeptime /= 1000.0 ## produce error with not callable processLine: _org_processLine = self.filter.processLine @@ -715,6 +720,8 @@ class LogFileMonitor(LogCaptureTestCase): pass def testNewChangeViaGetFailures_simple(self): + # speedup search using exact date pattern: + self.filter.setDatePattern('^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?') # suck in lines from this sample log file self.filter.getFailures(self.name) self.assertRaises(FailManagerEmpty, self.filter.failManager.toBan) @@ -730,6 +737,8 @@ class LogFileMonitor(LogCaptureTestCase): _assert_correct_last_attempt(self, self.filter, GetFailures.FAILURES_01) def testNewChangeViaGetFailures_rewrite(self): + # speedup search using exact date pattern: + self.filter.setDatePattern('^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?') # # if we rewrite the file at once self.file.close() @@ -748,6 +757,8 @@ class LogFileMonitor(LogCaptureTestCase): _assert_correct_last_attempt(self, self.filter, GetFailures.FAILURES_01) def testNewChangeViaGetFailures_move(self): + # speedup search using exact date pattern: + self.filter.setDatePattern('^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?') # # if we move file into a new location while it has been open already self.file.close() @@ -769,6 +780,7 @@ class CommonMonitorTestCase(unittest.TestCase): def setUp(self): """Call before every test case.""" + super(CommonMonitorTestCase, self).setUp() self._failTotal = 0 def waitFailTotal(self, count, delay=1.): @@ -819,6 +831,8 @@ def get_monitor_failures_testcase(Filter_): self.jail = DummyJail() self.filter = Filter_(self.jail) self.filter.addLogPath(self.name, autoSeek=False) + # speedup search using exact date pattern: + self.filter.setDatePattern('^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?') self.filter.active = True self.filter.addFailRegex("(?:(?:Authentication failure|Failed [-/\w+]+) for(?: [iI](?:llegal|nvalid) user)?|[Ii](?:llegal|nvalid) user|ROOT LOGIN REFUSED) .*(?: from|FROM) ") self.filter.start() @@ -1223,6 +1237,8 @@ class GetFailures(LogCaptureTestCase): self.jail = DummyJail() self.filter = FileFilter(self.jail) self.filter.active = True + # speedup search using exact date pattern: + self.filter.setDatePattern('^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?') # TODO Test this #self.filter.setTimeRegex("\S{3}\s{1,2}\d{1,2} \d{2}:\d{2}:\d{2}") #self.filter.setTimePattern("%b %d %H:%M:%S") @@ -1329,6 +1345,11 @@ class GetFailures(LogCaptureTestCase): output = (('212.41.96.186', 4, 1124013600.0), ('212.41.96.185', 2, 1124013598.0)) + # speedup search using exact date pattern: + self.filter.setDatePattern(('^%ExY(?P<_sep>[-/.])%m(?P=_sep)%d[T ]%H:%M:%S(?:[.,]%f)?(?:\s*%z)?', + '^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?', + '^EPOCH' + )) self.filter.setMaxRetry(2) self.filter.addLogPath(GetFailures.FILENAME_04, autoSeek=0) self.filter.addFailRegex("Invalid user .* ") @@ -1358,6 +1379,8 @@ class GetFailures(LogCaptureTestCase): if enc is not None: self.tearDown();self.setUp(); self.filter.setLogEncoding(enc); + # speedup search using exact date pattern: + self.filter.setDatePattern('^%ExY-%Exm-%Exd %ExH:%ExM:%ExS') self.assertNotLogged('Error decoding line'); self.filter.addLogPath(fname) self.filter.addFailRegex(failregex) @@ -1533,6 +1556,7 @@ class DNSUtilsNetworkTests(unittest.TestCase): def setUp(self): """Call before every test case.""" + super(DNSUtilsNetworkTests, self).setUp() unittest.F2B.SkipIfNoNetwork() def test_IPAddr(self): diff --git a/fail2ban/tests/misctestcase.py b/fail2ban/tests/misctestcase.py index 908e4f6c..cd841a0f 100644 --- a/fail2ban/tests/misctestcase.py +++ b/fail2ban/tests/misctestcase.py @@ -86,6 +86,7 @@ def _getSysPythonVersion(): class SetupTest(unittest.TestCase): def setUp(self): + super(SetupTest, self).setUp() unittest.F2B.SkipIfFast() setup = os.path.join(os.path.dirname(__file__), '..', '..', 'setup.py') self.setup = os.path.exists(setup) and setup or None diff --git a/fail2ban/tests/samplestestcase.py b/fail2ban/tests/samplestestcase.py index 31b1812e..0ed3e764 100644 --- a/fail2ban/tests/samplestestcase.py +++ b/fail2ban/tests/samplestestcase.py @@ -43,6 +43,7 @@ class FilterSamplesRegex(unittest.TestCase): def setUp(self): """Call before every test case.""" + super(FilterSamplesRegex, self).setUp() self.filter = Filter(None) self.filter.active = True diff --git a/fail2ban/tests/servertestcase.py b/fail2ban/tests/servertestcase.py index cd7ce5c9..03fd1a58 100644 --- a/fail2ban/tests/servertestcase.py +++ b/fail2ban/tests/servertestcase.py @@ -65,7 +65,7 @@ class TransmitterBase(unittest.TestCase): def setUp(self): """Call before every test case.""" - #super(TransmitterBase, self).setUp() + super(TransmitterBase, self).setUp() self.transm = self.server._Server__transm # To test thransmitter we don't need to start server... #self.server.start('/dev/null', '/dev/null', force=False) @@ -301,9 +301,11 @@ class Transmitter(TransmitterBase): ("%%%Y%m%d%H%M%S", "{*WD-BEG}%YearMonthDay24hourMinuteSecond{*WD-END}"), jail=self.jailName) self.setGetTest( - "datepattern", "Epoch", (None, "Epoch"), jail=self.jailName) + "datepattern", "Epoch", (None, "Epoch{*WD-END}"), jail=self.jailName) self.setGetTest( - "datepattern", "TAI64N", (None, "TAI64N"), jail=self.jailName) + "datepattern", "^Epoch", (None, "{^LN-BEG}Epoch{*WD-END}"), jail=self.jailName) + self.setGetTest( + "datepattern", "TAI64N", (None, "TAI64N{*WD-END}"), jail=self.jailName) self.setGetTestNOK("datepattern", "%Cat%a%%%g", jail=self.jailName) def testJailUseDNS(self): diff --git a/fail2ban/tests/sockettestcase.py b/fail2ban/tests/sockettestcase.py index 5bf0be57..1a94a952 100644 --- a/fail2ban/tests/sockettestcase.py +++ b/fail2ban/tests/sockettestcase.py @@ -41,6 +41,7 @@ class Socket(unittest.TestCase): def setUp(self): """Call before every test case.""" + super(Socket, self).setUp() self.server = AsyncServer(self) sock_fd, sock_name = tempfile.mkstemp('fail2ban.sock', 'socket') os.close(sock_fd) diff --git a/fail2ban/tests/utils.py b/fail2ban/tests/utils.py index 22079456..fc60f87b 100644 --- a/fail2ban/tests/utils.py +++ b/fail2ban/tests/utils.py @@ -526,6 +526,16 @@ if True: ## if not hasattr(unittest.TestCase, 'assertIn'): self.fail(msg) unittest.TestCase.assertNotIn = assertNotIn +_org_setUp = unittest.TestCase.setUp +def _customSetUp(self): + # print('=='*10, self) + if unittest.F2B.log_level <= logging.DEBUG: # so if DEBUG etc -- show them (and log it in travis)! + print("") + logSys.debug('='*10 + ' %s ' + '='*20, self.id()) + _org_setUp(self) + +unittest.TestCase.setUp = _customSetUp + class LogCaptureTestCase(unittest.TestCase): @@ -601,12 +611,11 @@ class LogCaptureTestCase(unittest.TestCase): # Let's log everything into a string self._log = LogCaptureTestCase._MemHandler(unittest.F2B.log_lazy) logSys.handlers = [self._log] - if self._old_level <= logging.DEBUG: # so if DEBUG etc -- show them (and log it in travis)! - print("") + if self._old_level <= logging.DEBUG: logSys.handlers += self._old_handlers - logSys.debug('='*10 + ' %s ' + '='*20, self.id()) - else: + else: # lowest log level to capture messages logSys.setLevel(logging.DEBUG) + super(LogCaptureTestCase, self).setUp() def tearDown(self): """Call after every test case.""" @@ -615,6 +624,7 @@ class LogCaptureTestCase(unittest.TestCase): logSys = getLogger("fail2ban") logSys.handlers = self._old_handlers logSys.level = self._old_level + super(LogCaptureTestCase, self).tearDown() def _is_logged(self, *s, **kwargs): logged = self._log.getvalue() From faee5f1fdc4a91dd39dff230ee0aea2d0fefcf70 Mon Sep 17 00:00:00 2001 From: sebres Date: Fri, 7 Oct 2016 18:33:01 +0200 Subject: [PATCH 11/19] better caching (thereby better performance), better recognition of similar regex --- config/filter.d/ejabberd-auth.conf | 5 +- fail2ban/server/datedetector.py | 101 ++++++++++++++++++----------- fail2ban/server/datetemplate.py | 5 +- fail2ban/tests/servertestcase.py | 8 +-- 4 files changed, 73 insertions(+), 46 deletions(-) diff --git a/config/filter.d/ejabberd-auth.conf b/config/filter.d/ejabberd-auth.conf index edd87463..56517489 100644 --- a/config/filter.d/ejabberd-auth.conf +++ b/config/filter.d/ejabberd-auth.conf @@ -34,4 +34,7 @@ maxlines = 2 # journalmatch = -datepattern = ^(?:=[^=]+={3,} )?({DATE}) +#datepattern = ^(?:=[^=]+={3,} )?({DATE}) +# explicit time format using prefix =...==== and no date in second string begins with I(...)... +datepattern = ^(?:=[^=]+={3,} )?(%%ExY(?P<_sep>[-/.])%%m(?P=_sep)%%d[T ]%%H:%%M:%%S(?:[.,]%%f)?(?:\s*%%z)?) + ^I\(()** diff --git a/fail2ban/server/datedetector.py b/fail2ban/server/datedetector.py index cced6d91..458f8d11 100644 --- a/fail2ban/server/datedetector.py +++ b/fail2ban/server/datedetector.py @@ -36,6 +36,46 @@ logSys = getLogger(__name__) logLevel = 6 RE_DATE_PREMATCH = re.compile("\{DATE\}", re.IGNORECASE) +DD_patternCache = Utils.Cache(maxCount=1000, maxTime=60*60) + + +def _getPatternTemplate(pattern, key=None): + if key is None: + key = pattern + if '%' not in pattern: + key = pattern.upper() + template = DD_patternCache.get(key) + + if not template: + if key in ("EPOCH", "{^LN-BEG}EPOCH", "^EPOCH"): + template = DateEpoch(lineBeginOnly=(key != "EPOCH")) + elif key in ("TAI64N", "{^LN-BEG}TAI64N", "^TAI64N"): + template = DateTai64n(wordBegin=('start' if key != "TAI64N" else False)) + else: + template = DatePatternRegex(pattern) + + DD_patternCache.set(key, template) + return template + +def _getAnchoredTemplate(template, wrap=lambda s: '{^LN-BEG}' + s): + # wrap name: + name = wrap(template.name) + # try to find in cache (by name): + template2 = DD_patternCache.get(name) + if not template2: + # wrap pattern (or regexp if not pattern template): + regex = wrap(getattr(template, 'pattern', template.regex)) + if hasattr(template, 'pattern'): + # try to find in cache (by pattern): + template2 = DD_patternCache.get(regex) + # make duplicate and set new anchored regex: + if not template2: + if not hasattr(template, 'pattern'): + template2 = _getPatternTemplate(name) + else: + template2 = _getPatternTemplate(regex) + return template2 + class DateDetectorCache(object): @@ -57,22 +97,20 @@ class DateDetectorCache(object): self._addDefaultTemplate() return self.__templates - def _cacheTemplate(self, template, lineBeginOnly=False): + def _cacheTemplate(self, template): """Cache Fail2Ban's default template. """ if isinstance(template, str): - # exact given template with word benin-end boundary: - if not lineBeginOnly: - template = DatePatternRegex(template) - else: - template = DatePatternRegex(template, wordBegin='start') - # additional template, that prefers datetime at start of a line (safety+performance feature): - if not lineBeginOnly and hasattr(template, 'regex'): - template2 = copy.copy(template) - regex = getattr(template, 'pattern', template.regex) - template2.setRegex(regex, wordBegin='start', wordEnd=True) - if template2.name != template.name: + # exact given template with word begin-end boundary: + template = _getPatternTemplate(template) + # if not already line-begin anchored, additional template, that prefers datetime + # at start of a line (safety+performance feature): + name = template.name + if not name.startswith('{^LN-BEG}') and not name.startswith('^') and hasattr(template, 'regex'): + template2 = _getAnchoredTemplate(template) + # prevent to add duplicates: + if template2.name != name: # increase weight of such templates, because they should be always # preferred in template sorting process (bubble up): template2.weight = 100.0 @@ -114,18 +152,17 @@ class DateDetectorCache(object): # with previous ("%d-%m-%ExY %H:%M:%S" by "%d(?P<_sep>[-/])%m(?P=_sep)(?:%ExY|%Exy) %H:%M:%S") self._cacheTemplate("%m-%d-%ExY %H:%M:%S(?:\.%f)?") # Epoch - self._cacheTemplate(DateEpoch(lineBeginOnly=True), lineBeginOnly=True) - self._cacheTemplate(DateEpoch()) + self._cacheTemplate('EPOCH') # Only time information in the log - self._cacheTemplate("%H:%M:%S", lineBeginOnly=True) + self._cacheTemplate("{^LN-BEG}%H:%M:%S") # <09/16/08@05:03:30> - self._cacheTemplate("<%m/%d/%Exy@%H:%M:%S>", lineBeginOnly=True) + self._cacheTemplate("^<%m/%d/%Exy@%H:%M:%S>") # MySQL: 130322 11:46:11 self._cacheTemplate("%Exy%Exm%Exd ?%H:%M:%S") # Apache Tomcat self._cacheTemplate("%b %d, %ExY %I:%M:%S %p") # ASSP: Apr-27-13 02:33:06 - self._cacheTemplate("%b-%d-%Exy %H:%M:%S", lineBeginOnly=True) + self._cacheTemplate("^%b-%d-%Exy %H:%M:%S") # 20050123T215959, 20050123 215959 self._cacheTemplate("%ExY%Exm%Exd[T ]%ExH%ExM%ExS(?:[.,]%f)?(?:\s*%z)?") # prefixed with optional named time zone (monit): @@ -134,7 +171,7 @@ class DateDetectorCache(object): # +00:00 Jan 23 21:59:59.011 2005 self._cacheTemplate("(?:%z )?(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?") # TAI64N - self._cacheTemplate(DateTai64n()) + self._cacheTemplate("TAI64N") # self.__templates = self.__tmpcache[0] + self.__tmpcache[1] del self.__tmpcache @@ -171,7 +208,6 @@ class DateDetector(object): templates """ _defCache = DateDetectorCache() - _patternCache = Utils.Cache(maxCount=1000, maxTime=60*60) def __init__(self): self.__templates = list() @@ -215,18 +251,11 @@ class DateDetector(object): key = pattern = template if '%' not in pattern: key = pattern.upper() - template = DateDetector._patternCache.get(key) - + template = DD_patternCache.get(key) if not template: - if key in ("EPOCH", "{^LN-BEG}EPOCH", "^EPOCH"): - template = DateEpoch(lineBeginOnly=(key != "EPOCH")) - elif key in ("TAI64N", "{^LN-BEG}TAI64N", "^TAI64N"): - template = DateTai64n(wordBegin=('start' if key != "TAI64N" else False)) - elif key in ("{^LN-BEG}", "{*WD-BEG}", "{DEFAULT}"): + if key in ("{^LN-BEG}", "{DEFAULT}"): flt = \ - lambda template: template.flags & DateTemplate.LINE_BEGIN if key == "{^LN-BEG}" else \ - lambda template: template.flags & DateTemplate.WORD_BEGIN if key == "{*WD-BEG}" else \ - None + lambda template: template.flags & DateTemplate.LINE_BEGIN if key == "{^LN-BEG}" else None self.addDefaultTemplate(flt) return elif "{DATE}" in key: @@ -234,9 +263,9 @@ class DateDetector(object): lambda template: not template.flags & DateTemplate.LINE_BEGIN, pattern) return else: - template = DatePatternRegex(pattern) + template = _getPatternTemplate(pattern, key) - DateDetector._patternCache.set(key, template) + DD_patternCache.set(key, template) self._appendTemplate(template) logSys.info(" date pattern `%r`: `%s`", @@ -253,13 +282,9 @@ class DateDetector(object): if filterTemplate is not None and not filterTemplate(template): continue # if exact pattern available - create copy of template, contains replaced {DATE} with default regex: if preMatch is not None: - deftemplate = template - template = DateDetector._patternCache.get((preMatch, deftemplate.name)) - if not template: - regex = getattr(deftemplate, 'pattern', deftemplate.regex) - template = copy.copy(deftemplate) - template.setRegex(RE_DATE_PREMATCH.sub(regex, preMatch)) - DateDetector._patternCache.set((preMatch, deftemplate.name), template) + # get cached or create a copy with modified name/pattern, using preMatch replacement for {DATE}: + template = _getAnchoredTemplate(template, + wrap=lambda s: RE_DATE_PREMATCH.sub(s, preMatch)) # append date detector template (ignore duplicate if some was added before default): self._appendTemplate(template, ignoreDup=ignoreDup) diff --git a/fail2ban/server/datetemplate.py b/fail2ban/server/datetemplate.py index 3a9612aa..f670cbbd 100644 --- a/fail2ban/server/datetemplate.py +++ b/fail2ban/server/datetemplate.py @@ -115,15 +115,14 @@ class DateTemplate(object): self.flags |= DateTemplate.WORD_BEGIN if wordBegin != 'start' else DateTemplate.LINE_BEGIN if wordBegin != 'start': regex = r'(?:^|\b|\W)' + regex - self.name = '{*WD-BEG}' + self.name else: regex = r"^(?:\W{0,2})?" + regex - self.name = '{^LN-BEG}' + self.name + if not self.name.startswith('{^LN-BEG}'): + self.name = '{^LN-BEG}' + self.name # if word end boundary: if boundEnd: self.flags |= DateTemplate.WORD_END regex += r'(?=\b|\W|$)' - self.name += '{*WD-END}' if RE_LINE_BOUND_BEG.search(regex): self.flags |= DateTemplate.LINE_BEGIN if RE_LINE_BOUND_END.search(regex): self.flags |= DateTemplate.LINE_END # remove possible special pattern "**" in front and end of regex: diff --git a/fail2ban/tests/servertestcase.py b/fail2ban/tests/servertestcase.py index 03fd1a58..724e3bb9 100644 --- a/fail2ban/tests/servertestcase.py +++ b/fail2ban/tests/servertestcase.py @@ -298,14 +298,14 @@ class Transmitter(TransmitterBase): def testDatePattern(self): self.setGetTest("datepattern", "%%%Y%m%d%H%M%S", - ("%%%Y%m%d%H%M%S", "{*WD-BEG}%YearMonthDay24hourMinuteSecond{*WD-END}"), + ("%%%Y%m%d%H%M%S", "%YearMonthDay24hourMinuteSecond"), jail=self.jailName) self.setGetTest( - "datepattern", "Epoch", (None, "Epoch{*WD-END}"), jail=self.jailName) + "datepattern", "Epoch", (None, "Epoch"), jail=self.jailName) self.setGetTest( - "datepattern", "^Epoch", (None, "{^LN-BEG}Epoch{*WD-END}"), jail=self.jailName) + "datepattern", "^Epoch", (None, "{^LN-BEG}Epoch"), jail=self.jailName) self.setGetTest( - "datepattern", "TAI64N", (None, "TAI64N{*WD-END}"), jail=self.jailName) + "datepattern", "TAI64N", (None, "TAI64N"), jail=self.jailName) self.setGetTestNOK("datepattern", "%Cat%a%%%g", jail=self.jailName) def testJailUseDNS(self): From ffa97054120d487dd5a19b6f88d2ec565c45000b Mon Sep 17 00:00:00 2001 From: sebres Date: Mon, 17 Oct 2016 12:09:23 +0200 Subject: [PATCH 12/19] fixed UTC/GMT named time zone using `%Z` and `%z` patterns (special case with 0 zone offset); Currently still ignores another named zones, because fail2ban assumes that the given date is in the current default zone. Closes gh-1575 --- fail2ban/server/strptime.py | 8 ++++++-- fail2ban/tests/datedetectortestcase.py | 10 +++++++++- fail2ban/tests/files/logs/zzz-generic-example | 8 ++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/fail2ban/server/strptime.py b/fail2ban/server/strptime.py index 643984d3..cdfe0e0e 100644 --- a/fail2ban/server/strptime.py +++ b/fail2ban/server/strptime.py @@ -41,7 +41,7 @@ def _getYearCentRE(cent=(0,3), distance=3, now=(MyTime.now(), MyTime.alternateNo #todo: implement literal time zone support like CET, PST, PDT, etc (via pytz): #timeRE['z'] = r"%s?(?PZ|[+-]\d{2}(?::?[0-5]\d)?|[A-Z]{3})?" % timeRE['Z'] timeRE['Z'] = r"(?P[A-Z]{3,5})" -timeRE['z'] = r"(?PZ|[+-]\d{2}(?::?[0-5]\d)?)" +timeRE['z'] = r"(?PZ|UTC|GMT|[+-]\d{2}(?::?[0-5]\d)?)" # Extend build-in TimeRE with some exact patterns # exact two-digit patterns: @@ -183,7 +183,7 @@ def reGroupDictStrptime(found_dict, msec=False): week_of_year_start = 0 elif key == 'z': z = val - if z == "Z": + if z in ("Z", "UTC", "GMT"): tzoffset = 0 else: tzoffset = int(z[1:3]) * 60 # Hours... @@ -191,6 +191,10 @@ def reGroupDictStrptime(found_dict, msec=False): tzoffset += int(z[-2:]) # ...and minutes if z.startswith("-"): tzoffset = -tzoffset + elif key == 'Z': + z = val + if z in ("UTC", "GMT"): + tzoffset = 0 # Fail2Ban will assume it's this year assume_year = False diff --git a/fail2ban/tests/datedetectortestcase.py b/fail2ban/tests/datedetectortestcase.py index fdb7ec71..695f099c 100644 --- a/fail2ban/tests/datedetectortestcase.py +++ b/fail2ban/tests/datedetectortestcase.py @@ -378,6 +378,11 @@ class CustomDateFormatsTest(unittest.TestCase): ("20031230010203", "{^LN-BEG}%ExY%Exm%Exd%ExH%ExM%ExS**", "#2003123001020320030101000000"), ("20031230010203", "{^LN-BEG}%ExY%Exm%Exd%ExH%ExM%ExS**", "##2003123001020320030101000000"), ("20031230010203", "{^LN-BEG}%ExY%Exm%Exd%ExH%ExM%ExS", "[20031230010203]20030101000000"), + # UTC/GMT time zone offset (with %z and %Z): + (1072746123.0 - 3600, "{^LN-BEG}%ExY-%Exm-%Exd %ExH:%ExM:%ExS(?: %z)?", "[2003-12-30 01:02:03] server ..."), + (1072746123.0 - 3600, "{^LN-BEG}%ExY-%Exm-%Exd %ExH:%ExM:%ExS(?: %Z)?", "[2003-12-30 01:02:03] server ..."), + (1072746123.0, "{^LN-BEG}%ExY-%Exm-%Exd %ExH:%ExM:%ExS(?: %z)?", "[2003-12-30 01:02:03 UTC] server ..."), + (1072746123.0, "{^LN-BEG}%ExY-%Exm-%Exd %ExH:%ExM:%ExS(?: %Z)?", "[2003-12-30 01:02:03 UTC] server ..."), ): logSys.debug('== test: %r', (matched, dp, line)) if dp is None: @@ -388,7 +393,10 @@ class CustomDateFormatsTest(unittest.TestCase): date = dd.getTime(line) if matched: self.assertTrue(date) - self.assertEqual(matched, date[1].group(1)) + if isinstance(matched, basestring): + self.assertEqual(matched, date[1].group(1)) + else: + self.assertEqual(matched, date[0]) else: self.assertEqual(date, None) diff --git a/fail2ban/tests/files/logs/zzz-generic-example b/fail2ban/tests/files/logs/zzz-generic-example index 2044c387..d0c31740 100644 --- a/fail2ban/tests/files/logs/zzz-generic-example +++ b/fail2ban/tests/files/logs/zzz-generic-example @@ -46,6 +46,14 @@ Jun 22 20:37:04 server test-demo[402]: writeToStorage plist={ # failJSON: { "time": "2005-06-22T20:37:04", "match": true , "host": "192.0.2.2" } 0000-12-30 00:00:00 server test-demo[47831]: F2B: failure from 192.0.2.2 +# -- test no zone and UTC/GMT named zone "2005-06-21T14:55:10 UTC" == "2005-06-21T16:55:10 CEST" (diff +2h in CEST): +# failJSON: { "time": "2005-06-21T16:55:09", "match": true , "host": "192.0.2.09" } +2005-06-21 16:55:09 machine test-demo(pam_unix)[13709] F2B: error from 192.0.2.09 +# failJSON: { "time": "2005-06-21T16:55:10", "match": true , "host": "192.0.2.10" } +2005-06-21 14:55:10 UTC machine test-demo(pam_unix)[13709] F2B: error from 192.0.2.10 +# failJSON: { "time": "2005-06-21T16:55:11", "match": true , "host": "192.0.2.11" } +2005-06-21 14:55:11 GMT machine test-demo(pam_unix)[13709] F2B: error from 192.0.2.11 + # failJSON: { "time": "2005-06-21T16:56:02", "match": true , "host": "192.0.2.250" } [Jun 21 16:56:02] machine test-demo(pam_unix)[13709] F2B: error from 192.0.2.250 # failJSON: { "match": false, "desc": "test 1st ignoreregex" } From c8b036456dad258a4b9d38168530933bc02cc71c Mon Sep 17 00:00:00 2001 From: sebres Date: Mon, 17 Oct 2016 12:11:41 +0200 Subject: [PATCH 13/19] changelog entries --- ChangeLog | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/ChangeLog b/ChangeLog index 16c65556..ff728685 100644 --- a/ChangeLog +++ b/ChangeLog @@ -13,6 +13,15 @@ TODO: implementing of options resp. other tasks from PR #1346 ### Fixes * [Grave] memory leak's fixed (gh-1277, gh-1234) +* [Grave] Misleading date patterns defined more precisely (using extended syntax + `%Ex[mdHMS]` for exact two-digit match or e. g. `%ExY` as more precise year + pattern, within same century of last year and the next 3 years) +* [Grave] extends date detector template with distance (position of match in + log-line), to prevent grave collision using (re)ordered template list (e.g. + find-spot of wrong date-match inside foreign input, misleading date patterns + by ambiguous formats, etc.) +* Distance collision check always prefers template with shortest distance + (left for right) if date pattern is not anchored * Tricky bug fix: last position of log file will be never retrieved (gh-795), because of CASCADE all log entries will be deleted from logs table together with jail, if used "INSERT OR REPLACE" statement @@ -28,6 +37,11 @@ TODO: implementing of options resp. other tasks from PR #1346 * Pyinotify-backend: stability fix for sporadically errors in multi-threaded environment (without lock) * Fixed sporadically error in testCymruInfoNxdomain, because of unsorted values +* Fixed UTC/GMT named time zone, using `%Z` and `%z` patterns + (special case with 0 zone offset, see gh-1575) +* `filter.d/freeswitch.conf` + - Optional prefixes (server, daemon, dual time) if systemd daemon logs used (gh-1548) + - User part rewritten to accept IPv6 resp. domain after "@" (gh-1548) ### New Features * IPv6 support: @@ -118,6 +132,22 @@ fail2ban-client set loglevel INFO - new replacement for `` in opposition to ``, for separate usage of 2 address groups only (regardless of `usedns`), `ip4` and `ip6` together, without host (dns) +* More precise date template handling (WARNING: theoretically possible incompatibilities): + - datedetector rewritten more strict as earlier; + - default templates can be specified exacter using prefix/suffix syntax (via `datepattern`); + - more as one date pattern can be specified using option `datepattern` now + (new-line separated); + - some default options like `datepattern` can be specified directly in + section `[Definition]`, that avoids contrary usage of unnecessarily `[Init]` + section, because of performance (each extra section costs time); + - option `datepattern` can be specified in jail also (e. g. jails without filters + or custom log-format, new-line separated for multiple patterns); + - if first unnamed group specified in pattern, only this will be cut out from + search log-line (e. g.: `^date:[({DATE})]` will cut out only datetime match + pattern, and leaves `date:[] ...` for searching in filter); + - faster match and fewer searching of appropriate templates + (DateDetector.matchTime calls rarer DateTemplate.matchDate now); + - several standard filters extended with exact prefixed or anchored date templates; * fail2ban-testcases: - `assertLogged` extended with parameter wait (to wait up to specified timeout, before we throw assert exception) + test cases rewritten using that From 58717c185484e643b808c3c77feb2520d36e88f3 Mon Sep 17 00:00:00 2001 From: sebres Date: Mon, 17 Oct 2016 13:26:24 +0200 Subject: [PATCH 14/19] fail2ban-testcases: persistently set (python) time zone to CET during test cases process (used in zone-related test-cases) --- fail2ban/tests/utils.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fail2ban/tests/utils.py b/fail2ban/tests/utils.py index fc60f87b..ae543e3d 100644 --- a/fail2ban/tests/utils.py +++ b/fail2ban/tests/utils.py @@ -260,6 +260,10 @@ def initTests(opts): raise unittest.SkipTest('Skip test because of "--no-network"') unittest.F2B.SkipIfNoNetwork = F2B_SkipIfNoNetwork + # persistently set time zone to CET (used in zone-related test-cases), + # yoh: we need to adjust TZ to match the one used by Cyril so all the timestamps match + os.environ['TZ'] = 'Europe/Zurich' + time.tzset() # set alternate now for time related test cases: MyTime.setAlternateNow(TEST_NOW) @@ -292,17 +296,10 @@ old_TZ = os.environ.get('TZ', None) def setUpMyTime(): # Set the time to a fixed, known value # Sun Aug 14 12:00:00 CEST 2005 - # yoh: we need to adjust TZ to match the one used by Cyril so all the timestamps match - os.environ['TZ'] = 'Europe/Zurich' - time.tzset() MyTime.setTime(TEST_NOW) def tearDownMyTime(): - os.environ.pop('TZ') - if old_TZ: # pragma: no cover - os.environ['TZ'] = old_TZ - time.tzset() MyTime.myTime = None From a2cf34a64ef3348b284135e04659418d2a04ac5f Mon Sep 17 00:00:00 2001 From: sebres Date: Thu, 17 Nov 2016 21:04:51 +0100 Subject: [PATCH 15/19] code review: added endpos to found tuple, just to be safe by unpack --- fail2ban/server/datedetector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fail2ban/server/datedetector.py b/fail2ban/server/datedetector.py index 458f8d11..01cb43fe 100644 --- a/fail2ban/server/datedetector.py +++ b/fail2ban/server/datedetector.py @@ -359,7 +359,7 @@ class DateDetector(object): if not match: self.__lastTemplIdx = 0x7fffffff logSys.log(logLevel, " search template (%i) ...", len(self.__templates)) - found = None, 0x7fffffff, -1 + found = None, 0x7fffffff, 0x7fffffff, -1 i = 0 for ddtempl in self.__templates: if logSys.getEffectiveLevel() <= logLevel-1: From c06084d7d98ccd8a7ce28d18fc0f81402fc9752d Mon Sep 17 00:00:00 2001 From: sebres Date: Mon, 28 Nov 2016 11:04:37 +0100 Subject: [PATCH 16/19] _start_params - fix: symlinks should be absolute paths --- fail2ban/tests/fail2banclienttestcase.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fail2ban/tests/fail2banclienttestcase.py b/fail2ban/tests/fail2banclienttestcase.py index b64ecb38..e6cc46cb 100644 --- a/fail2ban/tests/fail2banclienttestcase.py +++ b/fail2ban/tests/fail2banclienttestcase.py @@ -138,8 +138,8 @@ def _start_params(tmp, use_stock=False, logtarget="/dev/null", db=":memory:"): """Filters list of 'files' to contain only directories (under dir)""" return [f for f in files if isdir(pjoin(dir, f))] shutil.copytree(STOCK_CONF_DIR, cfg, ignore=ig_dirs) - os.symlink(pjoin(STOCK_CONF_DIR, "action.d"), pjoin(cfg, "action.d")) - os.symlink(pjoin(STOCK_CONF_DIR, "filter.d"), pjoin(cfg, "filter.d")) + os.symlink(os.path.abspath(pjoin(STOCK_CONF_DIR, "action.d")), pjoin(cfg, "action.d")) + os.symlink(os.path.abspath(pjoin(STOCK_CONF_DIR, "filter.d")), pjoin(cfg, "filter.d")) # replace fail2ban params (database with memory): r = re.compile(r'^dbfile\s*=') for line in fileinput.input(pjoin(cfg, "fail2ban.conf"), inplace=True): @@ -424,7 +424,7 @@ class Fail2banClientTest(Fail2banClientServerBase): self.execSuccess(startparams, "-vvd") self.assertLogged("Loading files") self.assertLogged("logtarget") - + @with_tmpdir @with_kill_srv def testClientStartBackgroundInside(self, tmp): From 5d5ab274350035cd9899ab15652f97216dd5d759 Mon Sep 17 00:00:00 2001 From: sebres Date: Mon, 28 Nov 2016 13:17:36 +0100 Subject: [PATCH 17/19] small amend: removed unreachable code + coverage increase --- fail2ban/server/datetemplate.py | 2 -- fail2ban/tests/datedetectortestcase.py | 17 ++++++++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/fail2ban/server/datetemplate.py b/fail2ban/server/datetemplate.py index f670cbbd..1d0b014b 100644 --- a/fail2ban/server/datetemplate.py +++ b/fail2ban/server/datetemplate.py @@ -219,7 +219,6 @@ class DateEpoch(DateTemplate): if dateMatch: # extract part of format which represents seconds since epoch return (float(dateMatch.group(1)), dateMatch) - return None class DatePatternRegex(DateTemplate): @@ -338,4 +337,3 @@ class DateTai64n(DateTemplate): seconds_since_epoch = value[2:17] # convert seconds from HEX into local time stamp return (int(seconds_since_epoch, 16), dateMatch) - return None diff --git a/fail2ban/tests/datedetectortestcase.py b/fail2ban/tests/datedetectortestcase.py index 695f099c..15cb6f2d 100644 --- a/fail2ban/tests/datedetectortestcase.py +++ b/fail2ban/tests/datedetectortestcase.py @@ -132,14 +132,15 @@ class DateDetectorTest(LogCaptureTestCase): (True, True, "1106513999.000", None), # Regular epoch with millisec (True, True, "[1106513999.000]", "1106513999.000"), # epoch squared (brackets are not in match) (False, True, "audit(1106513999.000:987)", "1106513999.000"), # SELinux + (True, True, "no date line", None), # no date in string ): - logSys.debug('== test %r', (anchored, bound, sdate)) + if rdate is None and sdate != "no date line": rdate = sdate + logSys.debug('== test %r', (anchored, bound, sdate, rdate)) for should_match, prefix in ( - (True, ""), + (rdate is not None, ""), (not anchored, "bogus-prefix "), (False, "word-boundary") ): - if rdate is None: rdate = sdate log = prefix + sdate + "[sshd] error: PAM: Authentication failure" # if not allowed boundary test: if not bound and prefix == "word-boundary": continue @@ -283,6 +284,16 @@ class DateDetectorTest(LogCaptureTestCase): finally: datedetector.logLevel = self.__old_eff_level + def testWrongTemplate(self): + t = DatePatternRegex('(%ExY%Exm%Exd') + # lazy compiling used, so try match: + self.assertRaises(Exception, t.matchDate, '(20050101') + self.assertLogged("Compile %r failed" % t.name) + # abstract: + t = DateTemplate() + self.assertRaises(Exception, t.getDate, 'no date line') + + iso8601 = DatePatternRegex("%Y-%m-%d[T ]%H:%M:%S(?:\.%f)?%z") class CustomDateFormatsTest(unittest.TestCase): From 39c343bd0699ae98d49a98aeeefaa4a9ecc6347a Mon Sep 17 00:00:00 2001 From: sebres Date: Mon, 28 Nov 2016 15:18:31 +0100 Subject: [PATCH 18/19] better reorder templates handling, code coverage increase (a small part of _reorderTemplate was not covered at all) --- fail2ban/server/datedetector.py | 52 +++++++++++++------------- fail2ban/tests/datedetectortestcase.py | 4 +- 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/fail2ban/server/datedetector.py b/fail2ban/server/datedetector.py index 01cb43fe..bef3be6e 100644 --- a/fail2ban/server/datedetector.py +++ b/fail2ban/server/datedetector.py @@ -92,7 +92,7 @@ class DateDetectorCache(object): if self.__templates: return self.__templates with self.__lock: - if self.__templates: + if self.__templates: # pragma: no cover - race-condition + multi-threaded environment only return self.__templates self._addDefaultTemplate() return self.__templates @@ -324,7 +324,7 @@ class DateDetector(object): ddtempl = self.__templates[i] template = ddtempl.template if template.flags & (DateTemplate.LINE_BEGIN|DateTemplate.LINE_END): - if logSys.getEffectiveLevel() <= logLevel-1: + if logSys.getEffectiveLevel() <= logLevel-1: # pragma: no cover - very-heavy debug logSys.log(logLevel-1, " try to match last anchored template #%02i ...", i) match = template.matchDate(line) ignoreBySearch = i @@ -452,7 +452,7 @@ class DateDetector(object): try: date = template.getDate(line, timeMatch[0]) if date is not None: - if logSys.getEffectiveLevel() <= logLevel: + if logSys.getEffectiveLevel() <= logLevel: # pragma: no cover - heavy debug logSys.log(logLevel, " got time %f for %r using template %s", date[0], date[1].group(1), template.name) return date @@ -478,29 +478,31 @@ class DateDetector(object): weight = ddtempl.weight ## try to move faster (first if unused available, or half of part to current template position): pos = self.__firstUnused if self.__firstUnused < num else num // 2 - pweight = templates[pos].weight ## don't move too often (multiline logs resp. log's with different date patterns), ## if template not used too long, replace it also : - if logSys.getEffectiveLevel() <= logLevel: - logSys.log(logLevel, " -> compare template #%02i & #%02i, weight %.3f > %.3f, hits %r > %r", - num, pos, weight, pweight, ddtempl.hits, templates[pos].hits) - if not pweight or weight > pweight or templates[pos].lastUsed < untime: - ## if not larger (and target position recently used) - move slow (exact 1 position): - if weight <= pweight and templates[pos].lastUsed > untime: - pos = num-1 - ## if still smaller and template at position used, don't move: - pweight = templates[pos].weight - if logSys.getEffectiveLevel() <= logLevel: - logSys.log(logLevel, " -> compare template #%02i & #%02i, weight %.3f > %.3f, hits %r > %r", - num, pos, weight, pweight, ddtempl.hits, templates[pos].hits) - if weight < pweight and templates[pos].lastUsed > untime: - return - del templates[num] - templates[pos:0] = [ddtempl] - ## correct first unused: - while self.__firstUnused < len(templates) and templates[self.__firstUnused].hits: - self.__firstUnused += 1 + def _moveable(): + pweight = templates[pos].weight if logSys.getEffectiveLevel() <= logLevel: - logSys.log(logLevel, " -> moved template #%02i -> #%02i", num, pos) - return pos + logSys.log(logLevel, " -> compare template #%02i & #%02i, weight %.3f > %.3f, hits %r > %r", + num, pos, weight, pweight, ddtempl.hits, templates[pos].hits) + return weight > pweight or untime > templates[pos].lastUsed + ## + ## if not moveable (smaller weight or target position recently used): + if not _moveable(): + ## try to move slow (exact 1 position): + if pos == num-1: + return num + pos = num-1 + ## if still smaller and template at position used, don't move: + if not _moveable(): + return num + ## move: + del templates[num] + templates[pos:0] = [ddtempl] + ## correct first unused: + while self.__firstUnused < len(templates) and templates[self.__firstUnused].hits: + self.__firstUnused += 1 + if logSys.getEffectiveLevel() <= logLevel: + logSys.log(logLevel, " -> moved template #%02i -> #%02i", num, pos) + return pos return num diff --git a/fail2ban/tests/datedetectortestcase.py b/fail2ban/tests/datedetectortestcase.py index 15cb6f2d..5b32a7e9 100644 --- a/fail2ban/tests/datedetectortestcase.py +++ b/fail2ban/tests/datedetectortestcase.py @@ -250,12 +250,12 @@ class DateDetectorTest(LogCaptureTestCase): ("030324 0:04:00", "server mysqld[1000]: 030324 0:04:00 [Warning] Access denied ..." " foreign-input just some free text 2003-03-07 17:05:01 test", 10), # distance collision detection (first date should be found): - ("Sep 16 21:30:26", "server mysqld[1020]: Sep 16 21:30:26 server mysqld: 030916 21:30:26 [Warning] Access denied", 10), + ("Sep 16 21:30:26", "server mysqld[1020]: Sep 16 21:30:26 server mysqld: 030916 21:30:26 [Warning] Access denied", 15), # just to test sorting: ("2005-10-07 06:09:42", "server mysqld[5906]: 2005-10-07 06:09:42 5907 [Warning] Access denied", 20), ("2005-10-08T15:26:18.237955", "server mysqld[5906]: 2005-10-08T15:26:18.237955 6 [Note] Access denied", 20), # date format changed again: - ("051009 10:05:30", "server mysqld[1000]: 051009 10:05:30 [Warning] Access denied ...", 20), + ("051009 10:05:30", "server mysqld[1000]: 051009 10:05:30 [Warning] Access denied ...", 50), ): logSys.debug('== test: %r', (debit, line, cnt)) for i in range(cnt): From 8018796b45b91e5cc8894978077b45234e299e6b Mon Sep 17 00:00:00 2001 From: sebres Date: Mon, 28 Nov 2016 17:17:48 +0100 Subject: [PATCH 19/19] wrong indentation (important code-piece in if log-level only) --- fail2ban/server/datedetector.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fail2ban/server/datedetector.py b/fail2ban/server/datedetector.py index bef3be6e..cccbf71a 100644 --- a/fail2ban/server/datedetector.py +++ b/fail2ban/server/datedetector.py @@ -326,8 +326,8 @@ class DateDetector(object): if template.flags & (DateTemplate.LINE_BEGIN|DateTemplate.LINE_END): if logSys.getEffectiveLevel() <= logLevel-1: # pragma: no cover - very-heavy debug logSys.log(logLevel-1, " try to match last anchored template #%02i ...", i) - match = template.matchDate(line) - ignoreBySearch = i + match = template.matchDate(line) + ignoreBySearch = i else: distance, endpos = self.__lastPos[0], self.__lastEndPos[0] if logSys.getEffectiveLevel() <= logLevel-1: @@ -357,7 +357,6 @@ class DateDetector(object): logSys.log(logLevel, " ** last pattern not found - pattern change, search ...") # search template and better match: if not match: - self.__lastTemplIdx = 0x7fffffff logSys.log(logLevel, " search template (%i) ...", len(self.__templates)) found = None, 0x7fffffff, 0x7fffffff, -1 i = 0 @@ -414,7 +413,7 @@ class DateDetector(object): self.__lastPos = distance, line[distance-1:distance] self.__lastEndPos = endpos, line[endpos:endpos+1] # if not first - try to reorder current template (bubble up), they will be not sorted anymore: - if i: + if i and i != self.__lastTemplIdx: i = self._reorderTemplate(i) self.__lastTemplIdx = i # return tuple with match and template reference used for parsing: