Merge pull request #581 from kwirk/datetemplate-regroupdict

ENH: Full regex for datepattern, utilising modified Python `_strptime`
pull/592/head
Steven Hiscocks 2014-01-20 14:53:28 -08:00
commit 4aa50684ab
7 changed files with 230 additions and 103 deletions

View File

@ -43,28 +43,24 @@ class DateDetector:
self.__known_names.add(name)
self.__templates.append(template)
def appendTemplate(self, template, **kwargs):
def appendTemplate(self, template):
if isinstance(template, str):
template = DatePatternRegex(template, **kwargs)
else:
assert not kwargs
template = DatePatternRegex(template)
DateDetector._appendTemplate(self, template)
def addDefaultTemplate(self):
self.__lock.acquire()
try:
if sys.version_info >= (2, 5): # because of '%.f'
# asctime with subsecond: Sun Jan 23 21:59:59.011 2005
self.appendTemplate("%a %b %d %H:%M:%S.%f %Y")
# asctime with subsecond: Sun Jan 23 21:59:59.011 2005
self.appendTemplate("%a %b %d %H:%M:%S\.%f %Y")
# asctime: Sun Jan 23 21:59:59 2005
self.appendTemplate("%a %b %d %H:%M:%S %Y")
# asctime without year: Sun Jan 23 21:59:59
self.appendTemplate("%a %b %d %H:%M:%S")
# standard: Jan 23 21:59:59
self.appendTemplate("%b %d %H:%M:%S")
if sys.version_info >= (2, 5): # because of '%.f'
# proftpd date: 2005-01-23 21:59:59,333
self.appendTemplate("%Y-%m-%d %H:%M:%S,%f")
# proftpd date: 2005-01-23 21:59:59,333
self.appendTemplate("%Y-%m-%d %H:%M:%S,%f")
# simple date: 2005-01-23 21:59:59
self.appendTemplate("%Y-%m-%d %H:%M:%S")
# simple date: 2005/01/23 21:59:59
@ -81,10 +77,9 @@ class DateDetector:
# CPanel 05/20/2008:01:57:39
self.appendTemplate("%m/%d/%Y:%H:%M:%S")
# custom for syslog-ng 2006.12.21 06:43:20
self.appendTemplate("%Y.%m.%d %H:%M:%S")
if sys.version_info >= (2, 5): # because of '%.f'
# named 26-Jul-2007 15:20:52.252
self.appendTemplate("%d-%b-%Y %H:%M:%S.%f")
self.appendTemplate("%Y\.%m\.%d %H:%M:%S")
# named 26-Jul-2007 15:20:52.252
self.appendTemplate("%d-%b-%Y %H:%M:%S\.%f")
# roundcube 26-Jul-2007 15:20:52 +0200
self.appendTemplate("%d-%b-%Y %H:%M:%S %z")
# 26-Jul-2007 15:20:52
@ -92,7 +87,7 @@ class DateDetector:
# 17-07-2008 17:23:25
self.appendTemplate("%d-%m-%Y %H:%M:%S")
# 01-27-2012 16:22:44.252
self.appendTemplate("%m-%d-%Y %H:%M:%S.%f")
self.appendTemplate("%m-%d-%Y %H:%M:%S\.%f")
# TAI64N
template = DateTai64n()
template.setName("TAI64N")
@ -106,15 +101,15 @@ class DateDetector:
template.setName("ISO 8601")
self.appendTemplate(template)
# Only time information in the log
self.appendTemplate("%H:%M:%S", anchor=True)
self.appendTemplate("^%H:%M:%S")
# <09/16/08@05:03:30>
self.appendTemplate("<%m/%d/%y@%H:%M:%S>", anchor=True)
self.appendTemplate("^<%m/%d/%y@%H:%M:%S>")
# MySQL: 130322 11:46:11
self.appendTemplate("%y%m%d %H:%M:%S", anchor=True)
self.appendTemplate("^%y%m%d ?%H:%M:%S")
# Apache Tomcat
self.appendTemplate("%b %d, %Y %I:%M:%S %p")
# ASSP: Apr-27-13 02:33:06
self.appendTemplate("%b-%d-%y %H:%M:%S", anchor=True)
self.appendTemplate("^%b-%d-%y %H:%M:%S")
finally:
self.__lock.release()

View File

@ -31,11 +31,12 @@ from datetime import timedelta
from .mytime import MyTime
from . import iso8601
from .strptime import reGroupDictStrptime, timeRE
logSys = logging.getLogger(__name__)
class DateTemplate:
class DateTemplate(object):
def __init__(self):
self.__name = ""
@ -55,7 +56,7 @@ class DateTemplate:
if (wordBegin and not re.search(r'^\^', regex)):
regex = r'\b' + regex
self.__regex = regex
self.__cRegex = re.compile(regex, re.UNICODE)
self.__cRegex = re.compile(regex, re.UNICODE | re.IGNORECASE)
def getRegex(self):
return self.__regex
@ -230,44 +231,32 @@ except ValueError:
DateStrptime._z = False
class DatePatternRegex(DateStrptime):
_reEscape = r"([\\.^$*+?\(\){}\[\]|])"
_patternRE = r"%(%|[aAbBdfHIjmMpSUwWyYz])"
_patternRE = r"%%(%%|[%s])" % "".join(timeRE.keys())
_patternName = {
'a': "DAY", 'A': "DAYNAME", 'b': "MON", 'B': "MONTH", 'd': "Day",
'H': "24hour", 'I': "12hour", 'j': "Yearday", 'm': "Month",
'M': "Minute", 'p': "AMPM", 'S': "Second", 'U': "Yearweek",
'w': "Weekday", 'W': "Yearweek", 'y': 'Year2', 'Y': "Year", '%': "%",
'z': "Zone offset", 'f': "Microseconds" }
_patternRegex = {
'a': r"\w{3}", 'A': r"\w+", 'b': r"\w{3}", 'B': r"\w+",
'd': r"(?:3[0-1]|[1-2]\d|[ 0]?\d)",
'f': r"(?P<_f>\d{1,6})", 'H': r"(?:2[0-3]|1\d|[ 0]?\d)",
'I': r"(?:1[0-2]|[ 0]?\d)",
'j': r"(?:36[0-6]3[0-5]\d|[1-2]\d\d|[ 0]?\d\d|[ 0]{0,2}\d)",
'm': r"(?:1[0-2]|[ 0]?[1-9])", 'M': r"[0-5]\d", 'p': r"[AP]M",
'S': r"(?:6[01]|[0-5]\d)", 'U': r"(?:5[0-3]|[1-4]\d|[ 0]?\d)",
'w': r"[0-6]", 'W': r"(?:5[0-3]|[ 0]?\d)", 'y': r"\d{2}",
'Y': r"\d{4}",
'z': r"(?P<_z>[+-]\d{4})", '%': "%"}
'z': "Zone offset", 'f': "Microseconds", 'Z': "Zone name"}
for key in set(timeRE) - set(_patternName): # may not have them all...
_patternName[key] = "%%%s" % key
def __init__(self, pattern=None, **kwargs):
DateStrptime.__init__(self)
super(DatePatternRegex, self).__init__()
if pattern:
self.setPattern(pattern, **kwargs)
def setPattern(self, pattern, anchor=False, **kwargs):
DateStrptime.setPattern(self, pattern.strip())
def setPattern(self, pattern):
super(DatePatternRegex, self).setPattern(pattern)
super(DatePatternRegex, self).setName(
re.sub(self._patternRE, r'%(\1)s', pattern) % self._patternName)
super(DatePatternRegex, self).setRegex(
re.sub(self._patternRE, r'%(\1)s', pattern) % timeRE)
name = re.sub(self._patternRE, r'%(\1)s', pattern) % self._patternName
DateStrptime.setName(self, name)
# Custom escape as don't want to escape "%"
pattern = re.sub(self._reEscape, r'\\\1', pattern)
regex = re.sub(
self._patternRE, r'%(\1)s', pattern) % self._patternRegex
if anchor:
regex = r"^" + regex
DateStrptime.setRegex(self, regex, **kwargs)
def getDate(self, line):
dateMatch = self.matchDate(line)
if dateMatch:
return reGroupDictStrptime(dateMatch.groupdict()), dateMatch
def setRegex(self, line):
raise NotImplementedError("Regex derived from pattern")

View File

@ -213,10 +213,7 @@ class Filter(JailThread):
template.setName("TAI64N")
else:
template = DatePatternRegex()
if pattern[0] == "^": # Special extra to enable anchor
template.setPattern(pattern[1:], anchor=True)
else:
template.setPattern(pattern, anchor=False)
template.setPattern(pattern)
self.dateDetector = DateDetector()
self.dateDetector.appendTemplate(template)
logSys.info("Date pattern set to `%r`: `%s`" %
@ -237,8 +234,6 @@ class Filter(JailThread):
elif len(templates) == 1:
if hasattr(templates[0], "getPattern"):
pattern = templates[0].getPattern()
if templates[0].getRegex()[0] == "^":
pattern = "^" + pattern
else:
pattern = None
return pattern, templates[0].getName()

193
fail2ban/server/strptime.py Normal file
View File

@ -0,0 +1,193 @@
# emacs: -*- mode: python; coding: utf-8; py-indent-offset: 4; indent-tabs-mode: t -*-
# vi: set ft=python sts=4 ts=4 sw=4 noet :
# This file is part of Fail2Ban.
#
# Fail2Ban is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Fail2Ban is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Fail2Ban; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import time
import calendar
import datetime
from _strptime import LocaleTime, TimeRE, _calc_julian_from_U_or_W
from .mytime import MyTime
locale_time = LocaleTime()
timeRE = TimeRE()
if 'z' not in timeRE: # python2.6 not present
timeRE['z'] = r"(?P<z>[+-]\d{2}[0-5]\d)"
def reGroupDictStrptime(found_dict):
"""This is tweaked from python built-in _strptime"""
now = MyTime.now()
year = month = day = hour = minute = None
hour = minute = None
second = fraction = 0
tz = -1
tzoffset = None
# Default to -1 to signify that values not known; not critical to have,
# though
week_of_year = -1
week_of_year_start = -1
# weekday and julian defaulted to -1 so as to signal need to calculate
# values
weekday = julian = -1
for group_key in found_dict.keys():
# Directives not explicitly handled below:
# c, x, X
# handled by making out of other directives
# U, W
# worthless without day of the week
if group_key == 'y':
year = int(found_dict['y'])
# Open Group specification for strptime() states that a %y
#value in the range of [00, 68] is in the century 2000, while
#[69,99] is in the century 1900
if year <= 68:
year += 2000
else:
year += 1900
elif group_key == 'Y':
year = int(found_dict['Y'])
elif group_key == 'm':
month = int(found_dict['m'])
elif group_key == 'B':
month = locale_time.f_month.index(found_dict['B'].lower())
elif group_key == 'b':
month = locale_time.a_month.index(found_dict['b'].lower())
elif group_key == 'd':
day = int(found_dict['d'])
elif group_key == 'H':
hour = int(found_dict['H'])
elif group_key == 'I':
hour = int(found_dict['I'])
ampm = found_dict.get('p', '').lower()
# If there was no AM/PM indicator, we'll treat this like AM
if ampm in ('', locale_time.am_pm[0]):
# We're in AM so the hour is correct unless we're
# looking at 12 midnight.
# 12 midnight == 12 AM == hour 0
if hour == 12:
hour = 0
elif ampm == locale_time.am_pm[1]:
# We're in PM so we need to add 12 to the hour unless
# we're looking at 12 noon.
# 12 noon == 12 PM == hour 12
if hour != 12:
hour += 12
elif group_key == 'M':
minute = int(found_dict['M'])
elif group_key == 'S':
second = int(found_dict['S'])
elif group_key == 'f':
s = found_dict['f']
# Pad to always return microseconds.
s += "0" * (6 - len(s))
fraction = int(s)
elif group_key == 'A':
weekday = locale_time.f_weekday.index(found_dict['A'].lower())
elif group_key == 'a':
weekday = locale_time.a_weekday.index(found_dict['a'].lower())
elif group_key == 'w':
weekday = int(found_dict['w'])
if weekday == 0:
weekday = 6
else:
weekday -= 1
elif group_key == 'j':
julian = int(found_dict['j'])
elif group_key in ('U', 'W'):
week_of_year = int(found_dict[group_key])
if group_key == 'U':
# U starts week on Sunday.
week_of_year_start = 6
else:
# W starts week on Monday.
week_of_year_start = 0
elif group_key == 'z':
z = found_dict['z']
tzoffset = int(z[1:3]) * 60 + int(z[3:5])
if z.startswith("-"):
tzoffset = -tzoffset
elif group_key == 'Z':
# Since -1 is default value only need to worry about setting tz if
# it can be something other than -1.
found_zone = found_dict['Z'].lower()
for value, tz_values in enumerate(locale_time.timezone):
if found_zone in tz_values:
# Deal with bad locale setup where timezone names are the
# same and yet time.daylight is true; too ambiguous to
# be able to tell what timezone has daylight savings
if (time.tzname[0] == time.tzname[1] and
time.daylight and found_zone not in ("utc", "gmt")):
break
else:
tz = value
break
# Fail2Ban will assume it's this year
assume_year = False
if year is None:
year = now.year
assume_year = True
# If we know the week of the year and what day of that week, we can figure
# out the Julian day of the year.
if julian == -1 and week_of_year != -1 and weekday != -1:
week_starts_Mon = True if week_of_year_start == 0 else False
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
week_starts_Mon)
# Cannot pre-calculate datetime.datetime() since can change in Julian
# calculation and thus could have different value for the day of the week
# calculation.
if julian != -1 and (month is None or day is None):
datetime_result = datetime.datetime.fromordinal((julian - 1) + datetime.datetime(year, 1, 1).toordinal())
year = datetime_result.year
month = datetime_result.month
day = datetime_result.day
# Add timezone info
tzname = found_dict.get("Z")
if tzoffset is not None:
gmtoff = tzoffset * 60
else:
gmtoff = None
# Fail2Ban assume today
assume_today = False
if month is None and day is None:
month = now.month
day = now.day
assume_today = True
# Actully create date
date_result = datetime.datetime(
year, month, day, hour, minute, second, fraction)
if gmtoff:
date_result = date_result - datetime.timedelta(seconds=gmtoff)
if date_result > now and assume_today:
# Rollover at midnight, could mean it's yesterday...
date_result = date_result - datetime.timedelta(days=1)
if date_result > now and assume_year:
# Could be last year?
# also reset month and day as it's not yesterday...
date_result = date_result.replace(
year=year-1, month=month, day=day)
if gmtoff is not None:
return calendar.timegm(date_result.utctimetuple())
else:
return time.mktime(date_result.utctimetuple())

View File

@ -148,47 +148,6 @@ class DateDetectorTest(unittest.TestCase):
self.assertEqual(logTime, mu)
self.assertEqual(logMatch.group(), '2012/10/11 02:37:17')
def testDateDetectorTemplateOverlap(self):
patterns = [template.getPattern()
for template in self.__datedetector.getTemplates()
if hasattr(template, "getPattern")]
year = 2008 # Leap year, 08 for %y can be confused with both %d and %m
def iterDates(year):
for month in xrange(1, 13):
for day in xrange(2, calendar.monthrange(year, month)[1]+1, 9):
for hour in xrange(0, 24, 6):
for minute in xrange(0, 60, 15):
for second in xrange(0, 60, 15): # Far enough?
yield datetime.datetime(
year, month, day, hour, minute, second, 300, Utc())
overlapedTemplates = set()
for date in iterDates(year):
for pattern in patterns:
datestr = date.strftime(pattern)
datestr = re.sub(r'%f','300', datestr) # for python 2.5 where there is no %f
datestrs = set([
datestr,
re.sub(r"(\s)0", r"\1 ", datestr),
re.sub(r"(\s)0", r"\1", datestr)])
for template in self.__datedetector.getTemplates():
template.resetHits()
for datestr in datestrs:
if template.matchDate(datestr): # or getDate?
template.incHits()
matchedTemplates = [template
for template in self.__datedetector.getTemplates()
if template.getHits() > 0]
self.assertNotEqual(matchedTemplates, [], "Date %r should match at least one template" % pattern)
if len(matchedTemplates) > 1:
overlapedTemplates.add((pattern, tuple(sorted(template.getName()
for template in matchedTemplates))))
if overlapedTemplates:
print("WARNING: The following date templates overlap:")
pprint.pprint(overlapedTemplates)
def testDateTemplate(self):
t = DateTemplate()
t.setRegex('^a{3,5}b?c*$')

View File

@ -205,7 +205,7 @@ class BasicFilter(unittest.TestCase):
self.filter.setDatePattern("^%Y-%m-%d-%H%M%S.%f %z")
self.assertEqual(self.filter.getDatePattern(),
("^%Y-%m-%d-%H%M%S.%f %z",
"Year-Month-Day-24hourMinuteSecond.Microseconds Zone offset"))
"^Year-Month-Day-24hourMinuteSecond.Microseconds Zone offset"))
class IgnoreIP(LogCaptureTestCase):

View File

@ -291,15 +291,11 @@ Similar to actions, filters have an [Init] section which can be overridden in \f
specifies the maximum number of lines to buffer to match multi-line regexs. For some log formats this will not required to be changed. Other logs may require to increase this value if a particular log file is frequently written to.
.TP
\fBdatepattern\fR
specifies a custom date pattern as an alternative to the default date detectors e.g. %Y-%m-%d %H:%M
specifies a custom date pattern/regex as an alternative to the default date detectors e.g. %Y-%m-%d %H:%M(?::%S)?. For a list of valid format directives, see Python library documentation for strptime behaviour.
.br
The following are acceptable format fields (see strptime(3) for descriptions):
.nf
%% %a %A %b %B %d %H %I %j %m %M %p %S %U %w %W %y %Y
.fi
.br
Also, special values of \fIEpoch\fR (UNIX Timestamp), \fITAI64N\fR and \fIISO8601\fR can be used.
.br
\fBNOTE:\fR due to config file string substitution, that %'s must be escaped by an % in config files.
.TP
\fBjournalmatch\fR
specifies the systemd journal match used to filter the journal entries. See \fBjournalctl(1)\fR and \fBsystemd.journal-fields(7)\fR for matches syntax and more details on special journal fields. This option is only valid for the \fIsystemd\fR backend.