From 5c16ac3a8983ae8250ba25a99579d7096facbe37 Mon Sep 17 00:00:00 2001 From: Steven Hiscocks Date: Sun, 12 Jan 2014 18:59:31 +0000 Subject: [PATCH 1/2] ENH: Full regex for datepattern, utilising modified Python `_strptime` --- fail2ban/server/datedetector.py | 33 ++--- fail2ban/server/datetemplate.py | 47 +++--- fail2ban/server/filter.py | 7 +- fail2ban/server/strptime.py | 193 +++++++++++++++++++++++++ fail2ban/tests/datedetectortestcase.py | 41 ------ fail2ban/tests/filtertestcase.py | 2 +- man/jail.conf.5 | 10 +- 7 files changed, 230 insertions(+), 103 deletions(-) create mode 100644 fail2ban/server/strptime.py diff --git a/fail2ban/server/datedetector.py b/fail2ban/server/datedetector.py index 64123a34..e5c57a31 100644 --- a/fail2ban/server/datedetector.py +++ b/fail2ban/server/datedetector.py @@ -43,28 +43,24 @@ class DateDetector: self.__known_names.add(name) self.__templates.append(template) - def appendTemplate(self, template, **kwargs): + def appendTemplate(self, template): if isinstance(template, str): - template = DatePatternRegex(template, **kwargs) - else: - assert not kwargs + template = DatePatternRegex(template) DateDetector._appendTemplate(self, template) def addDefaultTemplate(self): self.__lock.acquire() try: - if sys.version_info >= (2, 5): # because of '%.f' - # asctime with subsecond: Sun Jan 23 21:59:59.011 2005 - self.appendTemplate("%a %b %d %H:%M:%S.%f %Y") + # asctime with subsecond: Sun Jan 23 21:59:59.011 2005 + self.appendTemplate("%a %b %d %H:%M:%S\.%f %Y") # asctime: Sun Jan 23 21:59:59 2005 self.appendTemplate("%a %b %d %H:%M:%S %Y") # asctime without year: Sun Jan 23 21:59:59 self.appendTemplate("%a %b %d %H:%M:%S") # standard: Jan 23 21:59:59 self.appendTemplate("%b %d %H:%M:%S") - if sys.version_info >= (2, 5): # because of '%.f' - # proftpd date: 2005-01-23 21:59:59,333 - self.appendTemplate("%Y-%m-%d %H:%M:%S,%f") + # proftpd date: 2005-01-23 21:59:59,333 + self.appendTemplate("%Y-%m-%d %H:%M:%S,%f") # simple date: 2005-01-23 21:59:59 self.appendTemplate("%Y-%m-%d %H:%M:%S") # simple date: 2005/01/23 21:59:59 @@ -81,10 +77,9 @@ class DateDetector: # CPanel 05/20/2008:01:57:39 self.appendTemplate("%m/%d/%Y:%H:%M:%S") # custom for syslog-ng 2006.12.21 06:43:20 - self.appendTemplate("%Y.%m.%d %H:%M:%S") - if sys.version_info >= (2, 5): # because of '%.f' - # named 26-Jul-2007 15:20:52.252 - self.appendTemplate("%d-%b-%Y %H:%M:%S.%f") + self.appendTemplate("%Y\.%m\.%d %H:%M:%S") + # named 26-Jul-2007 15:20:52.252 + self.appendTemplate("%d-%b-%Y %H:%M:%S\.%f") # roundcube 26-Jul-2007 15:20:52 +0200 self.appendTemplate("%d-%b-%Y %H:%M:%S %z") # 26-Jul-2007 15:20:52 @@ -92,7 +87,7 @@ class DateDetector: # 17-07-2008 17:23:25 self.appendTemplate("%d-%m-%Y %H:%M:%S") # 01-27-2012 16:22:44.252 - self.appendTemplate("%m-%d-%Y %H:%M:%S.%f") + self.appendTemplate("%m-%d-%Y %H:%M:%S\.%f") # TAI64N template = DateTai64n() template.setName("TAI64N") @@ -106,15 +101,15 @@ class DateDetector: template.setName("ISO 8601") self.appendTemplate(template) # Only time information in the log - self.appendTemplate("%H:%M:%S", anchor=True) + self.appendTemplate("^%H:%M:%S") # <09/16/08@05:03:30> - self.appendTemplate("<%m/%d/%y@%H:%M:%S>", anchor=True) + self.appendTemplate("^<%m/%d/%y@%H:%M:%S>") # MySQL: 130322 11:46:11 - self.appendTemplate("%y%m%d %H:%M:%S", anchor=True) + self.appendTemplate("^%y%m%d ?%H:%M:%S") # Apache Tomcat self.appendTemplate("%b %d, %Y %I:%M:%S %p") # ASSP: Apr-27-13 02:33:06 - self.appendTemplate("%b-%d-%y %H:%M:%S", anchor=True) + self.appendTemplate("^%b-%d-%y %H:%M:%S") finally: self.__lock.release() diff --git a/fail2ban/server/datetemplate.py b/fail2ban/server/datetemplate.py index 729b951c..e5e9dc00 100644 --- a/fail2ban/server/datetemplate.py +++ b/fail2ban/server/datetemplate.py @@ -31,11 +31,12 @@ from datetime import timedelta from .mytime import MyTime from . import iso8601 +from .strptime import reGroupDictStrptime, timeRE logSys = logging.getLogger(__name__) -class DateTemplate: +class DateTemplate(object): def __init__(self): self.__name = "" @@ -55,7 +56,7 @@ class DateTemplate: if (wordBegin and not re.search(r'^\^', regex)): regex = r'\b' + regex self.__regex = regex - self.__cRegex = re.compile(regex, re.UNICODE) + self.__cRegex = re.compile(regex, re.UNICODE | re.IGNORECASE) def getRegex(self): return self.__regex @@ -230,44 +231,32 @@ except ValueError: DateStrptime._z = False class DatePatternRegex(DateStrptime): - _reEscape = r"([\\.^$*+?\(\){}\[\]|])" - _patternRE = r"%(%|[aAbBdfHIjmMpSUwWyYz])" + _patternRE = r"%%(%%|[%s])" % "".join(timeRE.keys()) _patternName = { 'a': "DAY", 'A': "DAYNAME", 'b': "MON", 'B': "MONTH", 'd': "Day", 'H': "24hour", 'I': "12hour", 'j': "Yearday", 'm': "Month", 'M': "Minute", 'p': "AMPM", 'S': "Second", 'U': "Yearweek", 'w': "Weekday", 'W': "Yearweek", 'y': 'Year2', 'Y': "Year", '%': "%", - 'z': "Zone offset", 'f': "Microseconds" } - _patternRegex = { - 'a': r"\w{3}", 'A': r"\w+", 'b': r"\w{3}", 'B': r"\w+", - 'd': r"(?:3[0-1]|[1-2]\d|[ 0]?\d)", - 'f': r"(?P<_f>\d{1,6})", 'H': r"(?:2[0-3]|1\d|[ 0]?\d)", - 'I': r"(?:1[0-2]|[ 0]?\d)", - 'j': r"(?:36[0-6]3[0-5]\d|[1-2]\d\d|[ 0]?\d\d|[ 0]{0,2}\d)", - 'm': r"(?:1[0-2]|[ 0]?[1-9])", 'M': r"[0-5]\d", 'p': r"[AP]M", - 'S': r"(?:6[01]|[0-5]\d)", 'U': r"(?:5[0-3]|[1-4]\d|[ 0]?\d)", - 'w': r"[0-6]", 'W': r"(?:5[0-3]|[ 0]?\d)", 'y': r"\d{2}", - 'Y': r"\d{4}", - 'z': r"(?P<_z>[+-]\d{4})", '%': "%"} + 'z': "Zone offset", 'f': "Microseconds", 'Z': "Zone name"} + for key in set(timeRE) - set(_patternName): # may not have them all... + _patternName[key] = "%%%s" % key def __init__(self, pattern=None, **kwargs): - DateStrptime.__init__(self) + super(DatePatternRegex, self).__init__() if pattern: self.setPattern(pattern, **kwargs) - def setPattern(self, pattern, anchor=False, **kwargs): - DateStrptime.setPattern(self, pattern.strip()) + def setPattern(self, pattern): + super(DatePatternRegex, self).setPattern(pattern) + super(DatePatternRegex, self).setName( + re.sub(self._patternRE, r'%(\1)s', pattern) % self._patternName) + super(DatePatternRegex, self).setRegex( + re.sub(self._patternRE, r'%(\1)s', pattern) % timeRE) - name = re.sub(self._patternRE, r'%(\1)s', pattern) % self._patternName - DateStrptime.setName(self, name) - - # Custom escape as don't want to escape "%" - pattern = re.sub(self._reEscape, r'\\\1', pattern) - regex = re.sub( - self._patternRE, r'%(\1)s', pattern) % self._patternRegex - if anchor: - regex = r"^" + regex - DateStrptime.setRegex(self, regex, **kwargs) + def getDate(self, line): + dateMatch = self.matchDate(line) + if dateMatch: + return reGroupDictStrptime(dateMatch.groupdict()), dateMatch def setRegex(self, line): raise NotImplementedError("Regex derived from pattern") diff --git a/fail2ban/server/filter.py b/fail2ban/server/filter.py index b4d214d1..8c3ad549 100644 --- a/fail2ban/server/filter.py +++ b/fail2ban/server/filter.py @@ -213,10 +213,7 @@ class Filter(JailThread): template.setName("TAI64N") else: template = DatePatternRegex() - if pattern[0] == "^": # Special extra to enable anchor - template.setPattern(pattern[1:], anchor=True) - else: - template.setPattern(pattern, anchor=False) + template.setPattern(pattern) self.dateDetector = DateDetector() self.dateDetector.appendTemplate(template) logSys.info("Date pattern set to `%r`: `%s`" % @@ -237,8 +234,6 @@ class Filter(JailThread): elif len(templates) == 1: if hasattr(templates[0], "getPattern"): pattern = templates[0].getPattern() - if templates[0].getRegex()[0] == "^": - pattern = "^" + pattern else: pattern = None return pattern, templates[0].getName() diff --git a/fail2ban/server/strptime.py b/fail2ban/server/strptime.py new file mode 100644 index 00000000..dac2fef0 --- /dev/null +++ b/fail2ban/server/strptime.py @@ -0,0 +1,193 @@ +# emacs: -*- mode: python; coding: utf-8; py-indent-offset: 4; indent-tabs-mode: t -*- +# vi: set ft=python sts=4 ts=4 sw=4 noet : + +# This file is part of Fail2Ban. +# +# Fail2Ban is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Fail2Ban is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Fail2Ban; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import time +import calendar +import datetime +from _strptime import LocaleTime, TimeRE, _calc_julian_from_U_or_W + +from .mytime import MyTime + +locale_time = LocaleTime() +timeRE = TimeRE() +if 'z' not in timeRE: # python2.6 not present + timeRE['z'] = r"(?P[+-]\d{2}[-5]\d)" + +def reGroupDictStrptime(found_dict): + """This is tweaked from python built-in _strptime""" + + now = MyTime.now() + year = month = day = hour = minute = None + hour = minute = None + second = fraction = 0 + tz = -1 + tzoffset = None + # Default to -1 to signify that values not known; not critical to have, + # though + week_of_year = -1 + week_of_year_start = -1 + # weekday and julian defaulted to -1 so as to signal need to calculate + # values + weekday = julian = -1 + for group_key in found_dict.keys(): + # Directives not explicitly handled below: + # c, x, X + # handled by making out of other directives + # U, W + # worthless without day of the week + if group_key == 'y': + year = int(found_dict['y']) + # Open Group specification for strptime() states that a %y + #value in the range of [00, 68] is in the century 2000, while + #[69,99] is in the century 1900 + if year <= 68: + year += 2000 + else: + year += 1900 + elif group_key == 'Y': + year = int(found_dict['Y']) + elif group_key == 'm': + month = int(found_dict['m']) + elif group_key == 'B': + month = locale_time.f_month.index(found_dict['B'].lower()) + elif group_key == 'b': + month = locale_time.a_month.index(found_dict['b'].lower()) + elif group_key == 'd': + day = int(found_dict['d']) + elif group_key == 'H': + hour = int(found_dict['H']) + elif group_key == 'I': + hour = int(found_dict['I']) + ampm = found_dict.get('p', '').lower() + # If there was no AM/PM indicator, we'll treat this like AM + if ampm in ('', locale_time.am_pm[0]): + # We're in AM so the hour is correct unless we're + # looking at 12 midnight. + # 12 midnight == 12 AM == hour 0 + if hour == 12: + hour = 0 + elif ampm == locale_time.am_pm[1]: + # We're in PM so we need to add 12 to the hour unless + # we're looking at 12 noon. + # 12 noon == 12 PM == hour 12 + if hour != 12: + hour += 12 + elif group_key == 'M': + minute = int(found_dict['M']) + elif group_key == 'S': + second = int(found_dict['S']) + elif group_key == 'f': + s = found_dict['f'] + # Pad to always return microseconds. + s += "0" * (6 - len(s)) + fraction = int(s) + elif group_key == 'A': + weekday = locale_time.f_weekday.index(found_dict['A'].lower()) + elif group_key == 'a': + weekday = locale_time.a_weekday.index(found_dict['a'].lower()) + elif group_key == 'w': + weekday = int(found_dict['w']) + if weekday == 0: + weekday = 6 + else: + weekday -= 1 + elif group_key == 'j': + julian = int(found_dict['j']) + elif group_key in ('U', 'W'): + week_of_year = int(found_dict[group_key]) + if group_key == 'U': + # U starts week on Sunday. + week_of_year_start = 6 + else: + # W starts week on Monday. + week_of_year_start = 0 + elif group_key == 'z': + z = found_dict['z'] + tzoffset = int(z[1:3]) * 60 + int(z[3:5]) + if z.startswith("-"): + tzoffset = -tzoffset + elif group_key == 'Z': + # Since -1 is default value only need to worry about setting tz if + # it can be something other than -1. + found_zone = found_dict['Z'].lower() + for value, tz_values in enumerate(locale_time.timezone): + if found_zone in tz_values: + # Deal with bad locale setup where timezone names are the + # same and yet time.daylight is true; too ambiguous to + # be able to tell what timezone has daylight savings + if (time.tzname[0] == time.tzname[1] and + time.daylight and found_zone not in ("utc", "gmt")): + break + else: + tz = value + break + + # Fail2Ban will assume it's this year + assume_year = False + if year is None: + year = now.year + assume_year = True + # If we know the week of the year and what day of that week, we can figure + # out the Julian day of the year. + if julian == -1 and week_of_year != -1 and weekday != -1: + week_starts_Mon = True if week_of_year_start == 0 else False + julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, + week_starts_Mon) + # Cannot pre-calculate datetime.datetime() since can change in Julian + # calculation and thus could have different value for the day of the week + # calculation. + if julian != -1 and (month is None or day is None): + datetime_result = datetime.datetime.fromordinal((julian - 1) + datetime.datetime(year, 1, 1).toordinal()) + year = datetime_result.year + month = datetime_result.month + day = datetime_result.day + # Add timezone info + tzname = found_dict.get("Z") + if tzoffset is not None: + gmtoff = tzoffset * 60 + else: + gmtoff = None + + # Fail2Ban assume today + assume_today = False + if month is None and day is None: + month = now.month + day = now.day + assume_today = True + + # Actully create date + date_result = datetime.datetime( + year, month, day, hour, minute, second, fraction) + if gmtoff: + date_result = date_result - datetime.timedelta(seconds=gmtoff) + + if date_result > now and assume_today: + # Rollover at midnight, could mean it's yesterday... + date_result = date_result - datetime.timedelta(days=1) + if date_result > now and assume_year: + # Could be last year? + # also reset month and day as it's not yesterday... + date_result = date_result.replace( + year=year-1, month=month, day=day) + + if gmtoff is not None: + return calendar.timegm(date_result.utctimetuple()) + else: + return time.mktime(date_result.utctimetuple()) + diff --git a/fail2ban/tests/datedetectortestcase.py b/fail2ban/tests/datedetectortestcase.py index 0077446f..79723e31 100644 --- a/fail2ban/tests/datedetectortestcase.py +++ b/fail2ban/tests/datedetectortestcase.py @@ -148,47 +148,6 @@ class DateDetectorTest(unittest.TestCase): self.assertEqual(logTime, mu) self.assertEqual(logMatch.group(), '2012/10/11 02:37:17') - def testDateDetectorTemplateOverlap(self): - patterns = [template.getPattern() - for template in self.__datedetector.getTemplates() - if hasattr(template, "getPattern")] - - year = 2008 # Leap year, 08 for %y can be confused with both %d and %m - def iterDates(year): - for month in xrange(1, 13): - for day in xrange(2, calendar.monthrange(year, month)[1]+1, 9): - for hour in xrange(0, 24, 6): - for minute in xrange(0, 60, 15): - for second in xrange(0, 60, 15): # Far enough? - yield datetime.datetime( - year, month, day, hour, minute, second, 300, Utc()) - - overlapedTemplates = set() - for date in iterDates(year): - for pattern in patterns: - datestr = date.strftime(pattern) - datestr = re.sub(r'%f','300', datestr) # for python 2.5 where there is no %f - datestrs = set([ - datestr, - re.sub(r"(\s)0", r"\1 ", datestr), - re.sub(r"(\s)0", r"\1", datestr)]) - for template in self.__datedetector.getTemplates(): - template.resetHits() - for datestr in datestrs: - if template.matchDate(datestr): # or getDate? - template.incHits() - - matchedTemplates = [template - for template in self.__datedetector.getTemplates() - if template.getHits() > 0] - self.assertNotEqual(matchedTemplates, [], "Date %r should match at least one template" % pattern) - if len(matchedTemplates) > 1: - overlapedTemplates.add((pattern, tuple(sorted(template.getName() - for template in matchedTemplates)))) - if overlapedTemplates: - print("WARNING: The following date templates overlap:") - pprint.pprint(overlapedTemplates) - def testDateTemplate(self): t = DateTemplate() t.setRegex('^a{3,5}b?c*$') diff --git a/fail2ban/tests/filtertestcase.py b/fail2ban/tests/filtertestcase.py index 02dfba2f..e7e874b5 100644 --- a/fail2ban/tests/filtertestcase.py +++ b/fail2ban/tests/filtertestcase.py @@ -205,7 +205,7 @@ class BasicFilter(unittest.TestCase): self.filter.setDatePattern("^%Y-%m-%d-%H%M%S.%f %z") self.assertEqual(self.filter.getDatePattern(), ("^%Y-%m-%d-%H%M%S.%f %z", - "Year-Month-Day-24hourMinuteSecond.Microseconds Zone offset")) + "^Year-Month-Day-24hourMinuteSecond.Microseconds Zone offset")) class IgnoreIP(LogCaptureTestCase): diff --git a/man/jail.conf.5 b/man/jail.conf.5 index a2462afb..9b337e48 100644 --- a/man/jail.conf.5 +++ b/man/jail.conf.5 @@ -268,15 +268,11 @@ Similar to actions, filters have an [Init] section which can be overridden in \f specifies the maximum number of lines to buffer to match multi-line regexs. For some log formats this will not required to be changed. Other logs may require to increase this value if a particular log file is frequently written to. .TP \fBdatepattern\fR -specifies a custom date pattern as an alternative to the default date detectors e.g. %Y-%m-%d %H:%M +specifies a custom date pattern/regex as an alternative to the default date detectors e.g. %Y-%m-%d %H:%M(?::%S)?. For a list of valid format directives, see Python library documentation for strptime behaviour. .br -The following are acceptable format fields (see strptime(3) for descriptions): -.nf -%% %a %A %b %B %d %H %I %j %m %M %p %S %U %w %W %y %Y -.fi -.br - Also, special values of \fIEpoch\fR (UNIX Timestamp), \fITAI64N\fR and \fIISO8601\fR can be used. +.br +\fBNOTE:\fR due to config file string substitution, that %'s must be escaped by an % in config files. .TP \fBjournalmatch\fR specifies the systemd journal match used to filter the journal entries. See \fBjournalctl(1)\fR and \fBsystemd.journal-fields(7)\fR for matches syntax and more details on special journal fields. This option is only valid for the \fIsystemd\fR backend. From d41f372c6cbcb0bf5b0d527dd8319da25f56bebf Mon Sep 17 00:00:00 2001 From: Steven Hiscocks Date: Sun, 12 Jan 2014 19:09:11 +0000 Subject: [PATCH 2/2] BF: Typo in "z" regex addition for TimeRE --- fail2ban/server/strptime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fail2ban/server/strptime.py b/fail2ban/server/strptime.py index dac2fef0..da04495f 100644 --- a/fail2ban/server/strptime.py +++ b/fail2ban/server/strptime.py @@ -27,7 +27,7 @@ from .mytime import MyTime locale_time = LocaleTime() timeRE = TimeRE() if 'z' not in timeRE: # python2.6 not present - timeRE['z'] = r"(?P[+-]\d{2}[-5]\d)" + timeRE['z'] = r"(?P[+-]\d{2}[0-5]\d)" def reGroupDictStrptime(found_dict): """This is tweaked from python built-in _strptime"""