Merge pull request #581 from kwirk/datetemplate-regroupdict

ENH: Full regex for datepattern, utilising modified Python `_strptime`
pull/592/head
Steven Hiscocks 2014-01-20 14:53:28 -08:00
commit 4aa50684ab
7 changed files with 230 additions and 103 deletions

View File

@ -43,28 +43,24 @@ class DateDetector:
self.__known_names.add(name) self.__known_names.add(name)
self.__templates.append(template) self.__templates.append(template)
def appendTemplate(self, template, **kwargs): def appendTemplate(self, template):
if isinstance(template, str): if isinstance(template, str):
template = DatePatternRegex(template, **kwargs) template = DatePatternRegex(template)
else:
assert not kwargs
DateDetector._appendTemplate(self, template) DateDetector._appendTemplate(self, template)
def addDefaultTemplate(self): def addDefaultTemplate(self):
self.__lock.acquire() self.__lock.acquire()
try: try:
if sys.version_info >= (2, 5): # because of '%.f' # asctime with subsecond: Sun Jan 23 21:59:59.011 2005
# asctime with subsecond: Sun Jan 23 21:59:59.011 2005 self.appendTemplate("%a %b %d %H:%M:%S\.%f %Y")
self.appendTemplate("%a %b %d %H:%M:%S.%f %Y")
# asctime: Sun Jan 23 21:59:59 2005 # asctime: Sun Jan 23 21:59:59 2005
self.appendTemplate("%a %b %d %H:%M:%S %Y") self.appendTemplate("%a %b %d %H:%M:%S %Y")
# asctime without year: Sun Jan 23 21:59:59 # asctime without year: Sun Jan 23 21:59:59
self.appendTemplate("%a %b %d %H:%M:%S") self.appendTemplate("%a %b %d %H:%M:%S")
# standard: Jan 23 21:59:59 # standard: Jan 23 21:59:59
self.appendTemplate("%b %d %H:%M:%S") self.appendTemplate("%b %d %H:%M:%S")
if sys.version_info >= (2, 5): # because of '%.f' # proftpd date: 2005-01-23 21:59:59,333
# proftpd date: 2005-01-23 21:59:59,333 self.appendTemplate("%Y-%m-%d %H:%M:%S,%f")
self.appendTemplate("%Y-%m-%d %H:%M:%S,%f")
# simple date: 2005-01-23 21:59:59 # simple date: 2005-01-23 21:59:59
self.appendTemplate("%Y-%m-%d %H:%M:%S") self.appendTemplate("%Y-%m-%d %H:%M:%S")
# simple date: 2005/01/23 21:59:59 # simple date: 2005/01/23 21:59:59
@ -81,10 +77,9 @@ class DateDetector:
# CPanel 05/20/2008:01:57:39 # CPanel 05/20/2008:01:57:39
self.appendTemplate("%m/%d/%Y:%H:%M:%S") self.appendTemplate("%m/%d/%Y:%H:%M:%S")
# custom for syslog-ng 2006.12.21 06:43:20 # custom for syslog-ng 2006.12.21 06:43:20
self.appendTemplate("%Y.%m.%d %H:%M:%S") self.appendTemplate("%Y\.%m\.%d %H:%M:%S")
if sys.version_info >= (2, 5): # because of '%.f' # named 26-Jul-2007 15:20:52.252
# named 26-Jul-2007 15:20:52.252 self.appendTemplate("%d-%b-%Y %H:%M:%S\.%f")
self.appendTemplate("%d-%b-%Y %H:%M:%S.%f")
# roundcube 26-Jul-2007 15:20:52 +0200 # roundcube 26-Jul-2007 15:20:52 +0200
self.appendTemplate("%d-%b-%Y %H:%M:%S %z") self.appendTemplate("%d-%b-%Y %H:%M:%S %z")
# 26-Jul-2007 15:20:52 # 26-Jul-2007 15:20:52
@ -92,7 +87,7 @@ class DateDetector:
# 17-07-2008 17:23:25 # 17-07-2008 17:23:25
self.appendTemplate("%d-%m-%Y %H:%M:%S") self.appendTemplate("%d-%m-%Y %H:%M:%S")
# 01-27-2012 16:22:44.252 # 01-27-2012 16:22:44.252
self.appendTemplate("%m-%d-%Y %H:%M:%S.%f") self.appendTemplate("%m-%d-%Y %H:%M:%S\.%f")
# TAI64N # TAI64N
template = DateTai64n() template = DateTai64n()
template.setName("TAI64N") template.setName("TAI64N")
@ -106,15 +101,15 @@ class DateDetector:
template.setName("ISO 8601") template.setName("ISO 8601")
self.appendTemplate(template) self.appendTemplate(template)
# Only time information in the log # Only time information in the log
self.appendTemplate("%H:%M:%S", anchor=True) self.appendTemplate("^%H:%M:%S")
# <09/16/08@05:03:30> # <09/16/08@05:03:30>
self.appendTemplate("<%m/%d/%y@%H:%M:%S>", anchor=True) self.appendTemplate("^<%m/%d/%y@%H:%M:%S>")
# MySQL: 130322 11:46:11 # MySQL: 130322 11:46:11
self.appendTemplate("%y%m%d %H:%M:%S", anchor=True) self.appendTemplate("^%y%m%d ?%H:%M:%S")
# Apache Tomcat # Apache Tomcat
self.appendTemplate("%b %d, %Y %I:%M:%S %p") self.appendTemplate("%b %d, %Y %I:%M:%S %p")
# ASSP: Apr-27-13 02:33:06 # ASSP: Apr-27-13 02:33:06
self.appendTemplate("%b-%d-%y %H:%M:%S", anchor=True) self.appendTemplate("^%b-%d-%y %H:%M:%S")
finally: finally:
self.__lock.release() self.__lock.release()

View File

@ -31,11 +31,12 @@ from datetime import timedelta
from .mytime import MyTime from .mytime import MyTime
from . import iso8601 from . import iso8601
from .strptime import reGroupDictStrptime, timeRE
logSys = logging.getLogger(__name__) logSys = logging.getLogger(__name__)
class DateTemplate: class DateTemplate(object):
def __init__(self): def __init__(self):
self.__name = "" self.__name = ""
@ -55,7 +56,7 @@ class DateTemplate:
if (wordBegin and not re.search(r'^\^', regex)): if (wordBegin and not re.search(r'^\^', regex)):
regex = r'\b' + regex regex = r'\b' + regex
self.__regex = regex self.__regex = regex
self.__cRegex = re.compile(regex, re.UNICODE) self.__cRegex = re.compile(regex, re.UNICODE | re.IGNORECASE)
def getRegex(self): def getRegex(self):
return self.__regex return self.__regex
@ -230,44 +231,32 @@ except ValueError:
DateStrptime._z = False DateStrptime._z = False
class DatePatternRegex(DateStrptime): class DatePatternRegex(DateStrptime):
_reEscape = r"([\\.^$*+?\(\){}\[\]|])" _patternRE = r"%%(%%|[%s])" % "".join(timeRE.keys())
_patternRE = r"%(%|[aAbBdfHIjmMpSUwWyYz])"
_patternName = { _patternName = {
'a': "DAY", 'A': "DAYNAME", 'b': "MON", 'B': "MONTH", 'd': "Day", 'a': "DAY", 'A': "DAYNAME", 'b': "MON", 'B': "MONTH", 'd': "Day",
'H': "24hour", 'I': "12hour", 'j': "Yearday", 'm': "Month", 'H': "24hour", 'I': "12hour", 'j': "Yearday", 'm': "Month",
'M': "Minute", 'p': "AMPM", 'S': "Second", 'U': "Yearweek", 'M': "Minute", 'p': "AMPM", 'S': "Second", 'U': "Yearweek",
'w': "Weekday", 'W': "Yearweek", 'y': 'Year2', 'Y': "Year", '%': "%", 'w': "Weekday", 'W': "Yearweek", 'y': 'Year2', 'Y': "Year", '%': "%",
'z': "Zone offset", 'f': "Microseconds" } 'z': "Zone offset", 'f': "Microseconds", 'Z': "Zone name"}
_patternRegex = { for key in set(timeRE) - set(_patternName): # may not have them all...
'a': r"\w{3}", 'A': r"\w+", 'b': r"\w{3}", 'B': r"\w+", _patternName[key] = "%%%s" % key
'd': r"(?:3[0-1]|[1-2]\d|[ 0]?\d)",
'f': r"(?P<_f>\d{1,6})", 'H': r"(?:2[0-3]|1\d|[ 0]?\d)",
'I': r"(?:1[0-2]|[ 0]?\d)",
'j': r"(?:36[0-6]3[0-5]\d|[1-2]\d\d|[ 0]?\d\d|[ 0]{0,2}\d)",
'm': r"(?:1[0-2]|[ 0]?[1-9])", 'M': r"[0-5]\d", 'p': r"[AP]M",
'S': r"(?:6[01]|[0-5]\d)", 'U': r"(?:5[0-3]|[1-4]\d|[ 0]?\d)",
'w': r"[0-6]", 'W': r"(?:5[0-3]|[ 0]?\d)", 'y': r"\d{2}",
'Y': r"\d{4}",
'z': r"(?P<_z>[+-]\d{4})", '%': "%"}
def __init__(self, pattern=None, **kwargs): def __init__(self, pattern=None, **kwargs):
DateStrptime.__init__(self) super(DatePatternRegex, self).__init__()
if pattern: if pattern:
self.setPattern(pattern, **kwargs) self.setPattern(pattern, **kwargs)
def setPattern(self, pattern, anchor=False, **kwargs): def setPattern(self, pattern):
DateStrptime.setPattern(self, pattern.strip()) super(DatePatternRegex, self).setPattern(pattern)
super(DatePatternRegex, self).setName(
re.sub(self._patternRE, r'%(\1)s', pattern) % self._patternName)
super(DatePatternRegex, self).setRegex(
re.sub(self._patternRE, r'%(\1)s', pattern) % timeRE)
name = re.sub(self._patternRE, r'%(\1)s', pattern) % self._patternName def getDate(self, line):
DateStrptime.setName(self, name) dateMatch = self.matchDate(line)
if dateMatch:
# Custom escape as don't want to escape "%" return reGroupDictStrptime(dateMatch.groupdict()), dateMatch
pattern = re.sub(self._reEscape, r'\\\1', pattern)
regex = re.sub(
self._patternRE, r'%(\1)s', pattern) % self._patternRegex
if anchor:
regex = r"^" + regex
DateStrptime.setRegex(self, regex, **kwargs)
def setRegex(self, line): def setRegex(self, line):
raise NotImplementedError("Regex derived from pattern") raise NotImplementedError("Regex derived from pattern")

View File

@ -213,10 +213,7 @@ class Filter(JailThread):
template.setName("TAI64N") template.setName("TAI64N")
else: else:
template = DatePatternRegex() template = DatePatternRegex()
if pattern[0] == "^": # Special extra to enable anchor template.setPattern(pattern)
template.setPattern(pattern[1:], anchor=True)
else:
template.setPattern(pattern, anchor=False)
self.dateDetector = DateDetector() self.dateDetector = DateDetector()
self.dateDetector.appendTemplate(template) self.dateDetector.appendTemplate(template)
logSys.info("Date pattern set to `%r`: `%s`" % logSys.info("Date pattern set to `%r`: `%s`" %
@ -237,8 +234,6 @@ class Filter(JailThread):
elif len(templates) == 1: elif len(templates) == 1:
if hasattr(templates[0], "getPattern"): if hasattr(templates[0], "getPattern"):
pattern = templates[0].getPattern() pattern = templates[0].getPattern()
if templates[0].getRegex()[0] == "^":
pattern = "^" + pattern
else: else:
pattern = None pattern = None
return pattern, templates[0].getName() return pattern, templates[0].getName()

193
fail2ban/server/strptime.py Normal file
View File

@ -0,0 +1,193 @@
# emacs: -*- mode: python; coding: utf-8; py-indent-offset: 4; indent-tabs-mode: t -*-
# vi: set ft=python sts=4 ts=4 sw=4 noet :
# This file is part of Fail2Ban.
#
# Fail2Ban is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Fail2Ban is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Fail2Ban; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import time
import calendar
import datetime
from _strptime import LocaleTime, TimeRE, _calc_julian_from_U_or_W
from .mytime import MyTime
locale_time = LocaleTime()
timeRE = TimeRE()
if 'z' not in timeRE: # python2.6 not present
timeRE['z'] = r"(?P<z>[+-]\d{2}[0-5]\d)"
def reGroupDictStrptime(found_dict):
"""This is tweaked from python built-in _strptime"""
now = MyTime.now()
year = month = day = hour = minute = None
hour = minute = None
second = fraction = 0
tz = -1
tzoffset = None
# Default to -1 to signify that values not known; not critical to have,
# though
week_of_year = -1
week_of_year_start = -1
# weekday and julian defaulted to -1 so as to signal need to calculate
# values
weekday = julian = -1
for group_key in found_dict.keys():
# Directives not explicitly handled below:
# c, x, X
# handled by making out of other directives
# U, W
# worthless without day of the week
if group_key == 'y':
year = int(found_dict['y'])
# Open Group specification for strptime() states that a %y
#value in the range of [00, 68] is in the century 2000, while
#[69,99] is in the century 1900
if year <= 68:
year += 2000
else:
year += 1900
elif group_key == 'Y':
year = int(found_dict['Y'])
elif group_key == 'm':
month = int(found_dict['m'])
elif group_key == 'B':
month = locale_time.f_month.index(found_dict['B'].lower())
elif group_key == 'b':
month = locale_time.a_month.index(found_dict['b'].lower())
elif group_key == 'd':
day = int(found_dict['d'])
elif group_key == 'H':
hour = int(found_dict['H'])
elif group_key == 'I':
hour = int(found_dict['I'])
ampm = found_dict.get('p', '').lower()
# If there was no AM/PM indicator, we'll treat this like AM
if ampm in ('', locale_time.am_pm[0]):
# We're in AM so the hour is correct unless we're
# looking at 12 midnight.
# 12 midnight == 12 AM == hour 0
if hour == 12:
hour = 0
elif ampm == locale_time.am_pm[1]:
# We're in PM so we need to add 12 to the hour unless
# we're looking at 12 noon.
# 12 noon == 12 PM == hour 12
if hour != 12:
hour += 12
elif group_key == 'M':
minute = int(found_dict['M'])
elif group_key == 'S':
second = int(found_dict['S'])
elif group_key == 'f':
s = found_dict['f']
# Pad to always return microseconds.
s += "0" * (6 - len(s))
fraction = int(s)
elif group_key == 'A':
weekday = locale_time.f_weekday.index(found_dict['A'].lower())
elif group_key == 'a':
weekday = locale_time.a_weekday.index(found_dict['a'].lower())
elif group_key == 'w':
weekday = int(found_dict['w'])
if weekday == 0:
weekday = 6
else:
weekday -= 1
elif group_key == 'j':
julian = int(found_dict['j'])
elif group_key in ('U', 'W'):
week_of_year = int(found_dict[group_key])
if group_key == 'U':
# U starts week on Sunday.
week_of_year_start = 6
else:
# W starts week on Monday.
week_of_year_start = 0
elif group_key == 'z':
z = found_dict['z']
tzoffset = int(z[1:3]) * 60 + int(z[3:5])
if z.startswith("-"):
tzoffset = -tzoffset
elif group_key == 'Z':
# Since -1 is default value only need to worry about setting tz if
# it can be something other than -1.
found_zone = found_dict['Z'].lower()
for value, tz_values in enumerate(locale_time.timezone):
if found_zone in tz_values:
# Deal with bad locale setup where timezone names are the
# same and yet time.daylight is true; too ambiguous to
# be able to tell what timezone has daylight savings
if (time.tzname[0] == time.tzname[1] and
time.daylight and found_zone not in ("utc", "gmt")):
break
else:
tz = value
break
# Fail2Ban will assume it's this year
assume_year = False
if year is None:
year = now.year
assume_year = True
# If we know the week of the year and what day of that week, we can figure
# out the Julian day of the year.
if julian == -1 and week_of_year != -1 and weekday != -1:
week_starts_Mon = True if week_of_year_start == 0 else False
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
week_starts_Mon)
# Cannot pre-calculate datetime.datetime() since can change in Julian
# calculation and thus could have different value for the day of the week
# calculation.
if julian != -1 and (month is None or day is None):
datetime_result = datetime.datetime.fromordinal((julian - 1) + datetime.datetime(year, 1, 1).toordinal())
year = datetime_result.year
month = datetime_result.month
day = datetime_result.day
# Add timezone info
tzname = found_dict.get("Z")
if tzoffset is not None:
gmtoff = tzoffset * 60
else:
gmtoff = None
# Fail2Ban assume today
assume_today = False
if month is None and day is None:
month = now.month
day = now.day
assume_today = True
# Actully create date
date_result = datetime.datetime(
year, month, day, hour, minute, second, fraction)
if gmtoff:
date_result = date_result - datetime.timedelta(seconds=gmtoff)
if date_result > now and assume_today:
# Rollover at midnight, could mean it's yesterday...
date_result = date_result - datetime.timedelta(days=1)
if date_result > now and assume_year:
# Could be last year?
# also reset month and day as it's not yesterday...
date_result = date_result.replace(
year=year-1, month=month, day=day)
if gmtoff is not None:
return calendar.timegm(date_result.utctimetuple())
else:
return time.mktime(date_result.utctimetuple())

View File

@ -148,47 +148,6 @@ class DateDetectorTest(unittest.TestCase):
self.assertEqual(logTime, mu) self.assertEqual(logTime, mu)
self.assertEqual(logMatch.group(), '2012/10/11 02:37:17') self.assertEqual(logMatch.group(), '2012/10/11 02:37:17')
def testDateDetectorTemplateOverlap(self):
patterns = [template.getPattern()
for template in self.__datedetector.getTemplates()
if hasattr(template, "getPattern")]
year = 2008 # Leap year, 08 for %y can be confused with both %d and %m
def iterDates(year):
for month in xrange(1, 13):
for day in xrange(2, calendar.monthrange(year, month)[1]+1, 9):
for hour in xrange(0, 24, 6):
for minute in xrange(0, 60, 15):
for second in xrange(0, 60, 15): # Far enough?
yield datetime.datetime(
year, month, day, hour, minute, second, 300, Utc())
overlapedTemplates = set()
for date in iterDates(year):
for pattern in patterns:
datestr = date.strftime(pattern)
datestr = re.sub(r'%f','300', datestr) # for python 2.5 where there is no %f
datestrs = set([
datestr,
re.sub(r"(\s)0", r"\1 ", datestr),
re.sub(r"(\s)0", r"\1", datestr)])
for template in self.__datedetector.getTemplates():
template.resetHits()
for datestr in datestrs:
if template.matchDate(datestr): # or getDate?
template.incHits()
matchedTemplates = [template
for template in self.__datedetector.getTemplates()
if template.getHits() > 0]
self.assertNotEqual(matchedTemplates, [], "Date %r should match at least one template" % pattern)
if len(matchedTemplates) > 1:
overlapedTemplates.add((pattern, tuple(sorted(template.getName()
for template in matchedTemplates))))
if overlapedTemplates:
print("WARNING: The following date templates overlap:")
pprint.pprint(overlapedTemplates)
def testDateTemplate(self): def testDateTemplate(self):
t = DateTemplate() t = DateTemplate()
t.setRegex('^a{3,5}b?c*$') t.setRegex('^a{3,5}b?c*$')

View File

@ -205,7 +205,7 @@ class BasicFilter(unittest.TestCase):
self.filter.setDatePattern("^%Y-%m-%d-%H%M%S.%f %z") self.filter.setDatePattern("^%Y-%m-%d-%H%M%S.%f %z")
self.assertEqual(self.filter.getDatePattern(), self.assertEqual(self.filter.getDatePattern(),
("^%Y-%m-%d-%H%M%S.%f %z", ("^%Y-%m-%d-%H%M%S.%f %z",
"Year-Month-Day-24hourMinuteSecond.Microseconds Zone offset")) "^Year-Month-Day-24hourMinuteSecond.Microseconds Zone offset"))
class IgnoreIP(LogCaptureTestCase): class IgnoreIP(LogCaptureTestCase):

View File

@ -291,15 +291,11 @@ Similar to actions, filters have an [Init] section which can be overridden in \f
specifies the maximum number of lines to buffer to match multi-line regexs. For some log formats this will not required to be changed. Other logs may require to increase this value if a particular log file is frequently written to. specifies the maximum number of lines to buffer to match multi-line regexs. For some log formats this will not required to be changed. Other logs may require to increase this value if a particular log file is frequently written to.
.TP .TP
\fBdatepattern\fR \fBdatepattern\fR
specifies a custom date pattern as an alternative to the default date detectors e.g. %Y-%m-%d %H:%M specifies a custom date pattern/regex as an alternative to the default date detectors e.g. %Y-%m-%d %H:%M(?::%S)?. For a list of valid format directives, see Python library documentation for strptime behaviour.
.br .br
The following are acceptable format fields (see strptime(3) for descriptions):
.nf
%% %a %A %b %B %d %H %I %j %m %M %p %S %U %w %W %y %Y
.fi
.br
Also, special values of \fIEpoch\fR (UNIX Timestamp), \fITAI64N\fR and \fIISO8601\fR can be used. Also, special values of \fIEpoch\fR (UNIX Timestamp), \fITAI64N\fR and \fIISO8601\fR can be used.
.br
\fBNOTE:\fR due to config file string substitution, that %'s must be escaped by an % in config files.
.TP .TP
\fBjournalmatch\fR \fBjournalmatch\fR
specifies the systemd journal match used to filter the journal entries. See \fBjournalctl(1)\fR and \fBsystemd.journal-fields(7)\fR for matches syntax and more details on special journal fields. This option is only valid for the \fIsystemd\fR backend. specifies the systemd journal match used to filter the journal entries. See \fBjournalctl(1)\fR and \fBsystemd.journal-fields(7)\fR for matches syntax and more details on special journal fields. This option is only valid for the \fIsystemd\fR backend.