[temp commit] 1st try to optimize datedetector/datetemplate functionality (fix ambiguous resp. misleading date detection if several formats used in log resp. by format switch after restart of some services):

* Misleading date patterns defined more precisely (using extended syntax %E[mdHMS]
  for exact two-digit match)
* `filter.d/freeswitch.conf`
    - Optional prefixes (server, daemon, dual time) if systemd daemon logs used (gh-1548)
    - User part rewritten to accept IPv6 resp. domain after "@" (gh-1548)
pull/1583/head
sebres 2016-09-27 17:53:45 +02:00
parent 10bdadaef2
commit a7d9de8c52
6 changed files with 181 additions and 98 deletions

View File

@ -8,10 +8,21 @@
# IP addresses on your LAN.
#
[INCLUDES]
# Read common prefixes. If any customizations available -- read them from
# common.local
before = common.conf
[Definition]
failregex = ^\.\d+ \[WARNING\] sofia_reg\.c:\d+ SIP auth (failure|challenge) \((REGISTER|INVITE)\) on sofia profile \'[^']+\' for \[.*\] from ip <HOST>$
^\.\d+ \[WARNING\] sofia_reg\.c:\d+ Can't find user \[\d+@\d+\.\d+\.\d+\.\d+\] from <HOST>$
_daemon = freeswitch
# Prefix contains common prefix line (server, daemon, etc.) and 2 datetimes if used systemd backend
_pref_line = ^%(__prefix_line)s(?:\d+-\d+-\d+ \d+:\d+:\d+\.\d+)?
failregex = %(_pref_line)s \[WARNING\] sofia_reg\.c:\d+ SIP auth (failure|challenge) \((REGISTER|INVITE)\) on sofia profile \'[^']+\' for \[[^\]]*\] from ip <HOST>$
%(_pref_line)s \[WARNING\] sofia_reg\.c:\d+ Can't find user \[[^@]+@[^\]]+\] from <HOST>$
ignoreregex =

View File

@ -21,6 +21,7 @@ __author__ = "Cyril Jaquier and Fail2Ban Contributors"
__copyright__ = "Copyright (c) 2004 Cyril Jaquier"
__license__ = "GPL"
import copy
import time
from threading import Lock
@ -35,6 +36,8 @@ logLevel = 6
class DateDetectorCache(object):
"""Implements the caching of the default templates list.
"""
def __init__(self):
self.__lock = Lock()
self.__templates = list()
@ -51,26 +54,44 @@ class DateDetectorCache(object):
def _cacheTemplate(self, template):
"""Cache Fail2Ban's default template.
"""
if isinstance(template, str):
# exact given template with word benin-end boundary:
template = DatePatternRegex(template)
# additional template, that prefers datetime at start of a line (safety+performance feature):
template2 = copy.copy(template)
if hasattr(template, 'pattern'):
regex = template.pattern
wordEnd = True
else:
regex = template.regex
wordEnd = False
template2.setRegex(regex, wordBegin='start', wordEnd=wordEnd)
if template2.name != template.name:
self.__templates.append(template2)
# add template:
self.__templates.append(template)
def _addDefaultTemplate(self):
"""Add resp. cache Fail2Ban's default set of date templates.
"""
# ISO 8601, simple date, optional subsecond and timezone:
# 2005-01-23T21:59:59.981746, 2005-01-23 21:59:59
# simple date: 2005/01/23 21:59:59
# custom for syslog-ng 2006.12.21 06:43:20
self._cacheTemplate("%Y(?P<_sep>[-/.])%m(?P=_sep)%d[T ]%H:%M:%S(?:[.,]%f)?(?:\s*%z)?")
# 20050123T215959, 20050123 215959
self._cacheTemplate("%Y%Em%Ed[T ]%EH%EM%ES(?:[.,]%f)?(?:\s*%z)?")
# asctime with optional day, subsecond and/or year:
# Sun Jan 23 21:59:59.011 2005
self._cacheTemplate("(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %Y)?")
# prefixed with optional time zone (monit):
# PDT Apr 16 21:05:29
self._cacheTemplate("(?:%z )?(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %Y)?")
# asctime with optional day, subsecond and/or year coming after day
# http://bugs.debian.org/798923
# Sun Jan 23 2005 21:59:59.011
self._cacheTemplate("(?:%a )?%b %d %Y %H:%M:%S(?:\.%f)?")
# simple date, optional subsecond (proftpd):
# 2005-01-23 21:59:59
# simple date: 2005/01/23 21:59:59
# custom for syslog-ng 2006.12.21 06:43:20
self._cacheTemplate("%Y(?P<_sep>[-/.])%m(?P=_sep)%d %H:%M:%S(?:,%f)?")
# simple date too (from x11vnc): 23/01/2005 21:59:59
# and with optional year given by 2 digits: 23/01/05 21:59:59
# (See http://bugs.debian.org/537610)
@ -79,37 +100,48 @@ class DateDetectorCache(object):
# Apache format optional time zone:
# [31/Oct/2006:09:22:55 -0000]
# 26-Jul-2007 15:20:52
# named 26-Jul-2007 15:20:52.252
# roundcube 26-Jul-2007 15:20:52 +0200
self._cacheTemplate("%d(?P<_sep>[-/])%b(?P=_sep)%Y[ :]?%H:%M:%S(?:\.%f)?(?: %z)?")
# CPanel 05/20/2008:01:57:39
self._cacheTemplate("%m/%d/%Y:%H:%M:%S")
# named 26-Jul-2007 15:20:52.252
# roundcube 26-Jul-2007 15:20:52 +0200
# 01-27-2012 16:22:44.252
# subseconds explicit to avoid possible %m<->%d confusion
# with previous
self._cacheTemplate("%m-%d-%Y %H:%M:%S\.%f")
# with previous ("%d-%m-%Y %H:%M:%S" by "%d(?P<_sep>[-/])%m(?P=_sep)(?:%Y|%y) %H:%M:%S")
self._cacheTemplate("%m-%d-%Y %H:%M:%S(?:\.%f)?")
# TAI64N
template = DateTai64n()
template.name = "TAI64N"
self._cacheTemplate(template)
self._cacheTemplate(DateTai64n())
# Epoch
template = DateEpoch()
template.name = "Epoch"
self._cacheTemplate(template)
# ISO 8601
self._cacheTemplate("%Y-%m-%d[T ]%H:%M:%S(?:\.%f)?(?:%z)?")
self._cacheTemplate(DateEpoch())
# Only time information in the log
self._cacheTemplate("^%H:%M:%S")
# <09/16/08@05:03:30>
self._cacheTemplate("^<%m/%d/%y@%H:%M:%S>")
# MySQL: 130322 11:46:11
self._cacheTemplate("^%y%m%d ?%H:%M:%S")
self._cacheTemplate("%y%Em%Ed ?%H:%M:%S")
# Apache Tomcat
self._cacheTemplate("%b %d, %Y %I:%M:%S %p")
# ASSP: Apr-27-13 02:33:06
self._cacheTemplate("^%b-%d-%y %H:%M:%S")
class DateDetectorTemplate(object):
"""Used for "shallow copy" of the template object.
Prevents collectively usage of hits/lastUsed in cached templates
"""
__slots__ = ('template', 'hits', 'lastUsed')
def __init__(self, template):
self.template = template
self.hits = 0
self.lastUsed = 0
def __getattr__(self, name):
""" Returns attribute of template (called for parameters not in slots)
"""
return getattr(self.template, name)
class DateDetector(object):
"""Manages one or more date templates to find a date within a log line.
@ -132,7 +164,7 @@ class DateDetector(object):
raise ValueError(
"There is already a template with name %s" % name)
self.__known_names.add(name)
self.__templates.append(template)
self.__templates.append(DateDetectorTemplate(template))
def appendTemplate(self, template):
"""Add a date template to manage and use in search of dates.
@ -186,13 +218,14 @@ class DateDetector(object):
"""
i = 0
with self.__lock:
for template in self.__templates:
for ddtemplate in self.__templates:
template = ddtemplate.template
match = template.matchDate(line)
if not match is None:
if logSys.getEffectiveLevel() <= logLevel:
logSys.log(logLevel, "Matched time template %s", template.name)
template.hits += 1
template.lastUsed = time.time()
ddtemplate.hits += 1
ddtemplate.lastUsed = time.time()
# if not first - try to reorder current template (bubble up), they will be not sorted anymore:
if i:
self._reorderTemplate(i)
@ -234,7 +267,8 @@ class DateDetector(object):
except ValueError:
return None
with self.__lock:
for template in self.__templates:
for ddtemplate in self.__templates:
template = ddtemplate.template
try:
date = template.getDate(line)
if date is None:
@ -261,14 +295,21 @@ class DateDetector(object):
## current hits and time the template was long unused:
untime = template.lastUsed - self.__unusedTime
hits = template.hits
## try to move faster (first 2 if it still unused, or half of part to current template position):
phits = 0
for pos in (0, 1, num // 2):
phits = templates[pos].hits
if not phits:
break
## don't move too often (multiline logs resp. log's with different date patterns),
## if template not used too long, replace it also :
if hits > templates[num-1].hits + 5 or templates[num-1].lastUsed < untime:
## try to move faster (half of part to current template):
pos = num // 2
## if not larger - move slow (exact 1 position):
if hits <= templates[pos].hits or templates[pos].lastUsed < untime:
if not phits or hits > phits + 5 or templates[pos].lastUsed < untime:
## if not larger (and target position recently used) - move slow (exact 1 position):
if hits <= phits and templates[pos].lastUsed > untime:
pos = num-1
## if still smaller and template at position used, don't move:
if hits < templates[pos].hits and templates[pos].lastUsed > untime:
return
templates[pos], templates[num] = template, templates[pos]

View File

@ -27,7 +27,7 @@ __license__ = "GPL"
import re
from abc import abstractmethod
from .strptime import reGroupDictStrptime, timeRE
from .strptime import reGroupDictStrptime, timeRE, getTimePatternRE
from ..helpers import getLogger
logSys = getLogger(__name__)
@ -49,8 +49,6 @@ class DateTemplate(object):
self._name = ""
self._regex = ""
self._cRegex = None
self.hits = 0
self.lastUsed = 0
@property
def name(self):
@ -88,9 +86,11 @@ class DateTemplate(object):
"""
regex = regex.strip()
if wordBegin and not re.search(r'^\^', regex):
regex = r'(?=^|\b|\W)' + regex
regex = (r'(?<=^|\b)' if wordBegin != 'start' else r"^(?<=\W)?") + regex
self._name = ('[*WD-BEG]' if wordBegin != 'start' else '[^LN-BEG]') + self._name
if wordEnd and not re.search(r'\$$', regex):
regex += r'(?=\b|\W|$)'
self._name += ('[*WD-END]' if wordEnd else '')
self._regex = regex
regex = property(getRegex, setRegex, doc=
@ -140,7 +140,9 @@ class DateEpoch(DateTemplate):
def __init__(self):
DateTemplate.__init__(self)
self.regex = r"(?:^|(?P<square>(?<=^\[))|(?P<selinux>(?<=audit\()))\d{10,11}\b(?:\.\d{3,6})?(?:(?(selinux)(?=:\d+\)))|(?(square)(?=\])))"
self.name = "Epoch"
self.setRegex(r"(?:^|(?P<square>(?<=^\[))|(?P<selinux>(?<=\baudit\()))\d{10,11}\b(?:\.\d{3,6})?(?:(?(selinux)(?=:\d+\)))|(?(square)(?=\])))",
wordBegin=False) ;# already line begin resp. word begin anchored
def getDate(self, line, dateMatch=None):
"""Method to return the date for a log line.
@ -178,7 +180,8 @@ class DatePatternRegex(DateTemplate):
regex
pattern
"""
_patternRE = re.compile(r"%%(%%|[%s])" % "".join(timeRE.keys()))
_patternRE = re.compile(getTimePatternRE())
_patternName = {
'a': "DAY", 'A': "DAYNAME", 'b': "MON", 'B': "MONTH", 'd': "Day",
'H': "24hour", 'I': "12hour", 'j': "Yearday", 'm': "Month",
@ -188,11 +191,11 @@ class DatePatternRegex(DateTemplate):
for _key in set(timeRE) - set(_patternName): # may not have them all...
_patternName[_key] = "%%%s" % _key
def __init__(self, pattern=None):
def __init__(self, pattern=None, **kwargs):
super(DatePatternRegex, self).__init__()
self._pattern = None
if pattern is not None:
self.pattern = pattern
self.setRegex(pattern, **kwargs)
@property
def pattern(self):
@ -208,17 +211,13 @@ class DatePatternRegex(DateTemplate):
@pattern.setter
def pattern(self, pattern):
self.setRegex(pattern)
def setRegex(self, pattern, wordBegin=True, wordEnd=True):
self._pattern = pattern
fmt = self._patternRE.sub(r'%(\1)s', pattern)
self._name = fmt % self._patternName
super(DatePatternRegex, self).setRegex(fmt % timeRE)
def setRegex(self, value):
raise NotImplementedError("Regex derived from pattern")
@DateTemplate.name.setter
def name(self, value):
raise NotImplementedError("Name derived from pattern")
super(DatePatternRegex, self).setRegex(fmt % timeRE, wordBegin, wordEnd)
def getDate(self, line, dateMatch=None):
"""Method to return the date for a log line.
@ -258,6 +257,7 @@ class DateTai64n(DateTemplate):
def __init__(self):
DateTemplate.__init__(self)
self.name = "TAI64N"
# We already know the format for TAI64N
# yoh: we should not add an additional front anchor
self.setRegex("@[0-9a-f]{24}", wordBegin=False)

View File

@ -26,8 +26,24 @@ from .mytime import MyTime
locale_time = LocaleTime()
timeRE = TimeRE()
#todo: implement literal time zone support like CET, PST, PDT, etc (via pytz):
#timeRE['z'] = r"%s?(?P<z>Z|[+-]\d{2}(?::?[0-5]\d)?|[A-Z]{3})?" % timeRE['Z']
timeRE['z'] = r"(?P<z>Z|[+-]\d{2}(?::?[0-5]\d)?)"
# Extend build-in TimeRE with some exact (two-digit) patterns:
timeRE['Ed'] = r"(?P<d>3[0-1]|[1-2]\d|0[1-9])"
timeRE['Em'] = r"(?P<m>1[0-2]|0[1-9])"
timeRE['EH'] = r"(?P<H>2[0-3]|[0-1]\d)"
timeRE['EM'] = r"(?P<M>[0-5]\d)"
timeRE['ES'] = r"(?P<S>6[0-1]|[0-5]\d)"
def getTimePatternRE():
keys = timeRE.keys()
return (r"%%(%%|%s|[%s])" % (
"|".join([k for k in keys if len(k) > 1]),
"".join([k for k in keys if len(k) == 1]),
))
def reGroupDictStrptime(found_dict):
"""Return time from dictionary of strptime fields

View File

@ -89,69 +89,79 @@ class DateDetectorTest(LogCaptureTestCase):
"""
dateUnix = 1106513999.0
for anchored, sdate in (
(False, "Jan 23 21:59:59"),
(False, "Sun Jan 23 21:59:59 2005"),
(False, "Sun Jan 23 21:59:59"),
(False, "Sun Jan 23 2005 21:59:59"),
(False, "2005/01/23 21:59:59"),
(False, "2005.01.23 21:59:59"),
(False, "23/01/2005 21:59:59"),
(False, "23/01/05 21:59:59"),
(False, "23/Jan/2005:21:59:59"),
(False, "23/Jan/2005:21:59:59 +0100"),
(False, "01/23/2005:21:59:59"),
(False, "2005-01-23 21:59:59"),
(False, "2005-01-23 21:59:59,000"), # proftpd
(False, "23-Jan-2005 21:59:59"),
(False, "23-Jan-2005 21:59:59.02"),
(False, "23-Jan-2005 21:59:59 +0100"),
(False, "23-01-2005 21:59:59"),
(True, "1106513999"), # Portsetry
(False, "01-23-2005 21:59:59.252"), # reported on f2b, causes Feb29 fix to break
(False, "@4000000041f4104f00000000"), # TAI64N
(False, "2005-01-23T20:59:59.252Z"), #ISO 8601 (UTC)
(False, "2005-01-23T15:59:59-05:00"), #ISO 8601 with TZ
(False, "2005-01-23T21:59:59"), #ISO 8601 no TZ, assume local
(True, "<01/23/05@21:59:59>"),
(True, "050123 21:59:59"), # MySQL
(True, "Jan-23-05 21:59:59"), # ASSP like
(False, "Jan 23, 2005 9:59:59 PM"), # Apache Tomcat
(True, "1106513999"), # Regular epoch
(True, "1106513999.000"), # Regular epoch with millisec
(False, "audit(1106513999.000:987)"), # SELinux
for anchored, bound, sdate, rdate in (
(False, True, "Jan 23 21:59:59", None),
(False, False, "Sun Jan 23 21:59:59 2005", None),
(False, False, "Sun Jan 23 21:59:59", None),
(False, False, "Sun Jan 23 2005 21:59:59", None),
(False, True, "2005/01/23 21:59:59", None),
(False, True, "2005.01.23 21:59:59", None),
(False, True, "23/01/2005 21:59:59", None),
(False, True, "23/01/05 21:59:59", None),
(False, True, "23/Jan/2005:21:59:59", None),
(False, True, "23/Jan/2005:21:59:59 +0100", None),
(False, True, "01/23/2005:21:59:59", None),
(False, True, "2005-01-23 21:59:59", None),
(False, True, "2005-01-23 21:59:59,000", None), # proftpd
(False, True, "23-Jan-2005 21:59:59", None),
(False, True, "23-Jan-2005 21:59:59.02", None),
(False, True, "23-Jan-2005 21:59:59 +0100", None),
(False, True, "23-01-2005 21:59:59", None),
(True, True, "1106513999", None), # Portsetry
(False, True, "01-23-2005 21:59:59.252", None), # reported on f2b, causes Feb29 fix to break
(False, False, "@4000000041f4104f00000000", None), # TAI64N
(False, True, "2005-01-23T20:59:59.252Z", None), #ISO 8601 (UTC)
(False, True, "2005-01-23T15:59:59-05:00", None), #ISO 8601 with TZ
(False, True, "2005-01-23 21:59:59", None), #ISO 8601 no TZ, assume local
(False, True, "20050123T215959", None), #Short ISO
(False, True, "20050123 215959", None), #Short ISO
(True, True, "<01/23/05@21:59:59>", None),
(False, True, "050123 21:59:59", None), # MySQL
(True, True, "Jan-23-05 21:59:59", None), # ASSP like
(False, True, "Jan 23, 2005 9:59:59 PM", None), # Apache Tomcat
(True, True, "1106513999", None), # Regular epoch
(True, True, "1106513999.000", None), # Regular epoch with millisec
(True, True, "[1106513999.000]", "1106513999.000"), # epoch squared
(False, True, "audit(1106513999.000:987)", "1106513999.000"), # SELinux
):
logSys.debug('== test %r', (anchored, bound, sdate))
for should_match, prefix in (
(True, ""),
(not anchored, "bogus-prefix "),
(False, "word-boundary")
):
for should_match, prefix in ((True, ""),
(not anchored, "bogus-prefix ")):
if rdate is None: rdate = sdate
log = prefix + sdate + "[sshd] error: PAM: Authentication failure"
# if not allowed boundary test:
if not bound and prefix == "word-boundary": continue
logSys.debug(' -- test %-5s for %r', should_match, log)
# with getTime:
logtime = self.__datedetector.getTime(log)
if should_match:
self.assertNotEqual(logtime, None, "getTime retrieved nothing: failure for %s, anchored: %r, log: %s" % ( sdate, anchored, log))
self.assertNotEqual(logtime, None,
"getTime retrieved nothing: failure for %s by prefix %r, anchored: %r, log: %s" % ( sdate, prefix, anchored, log))
( logUnix, logMatch ) = logtime
self.assertEqual(logUnix, dateUnix, "getTime comparison failure for %s: \"%s\" is not \"%s\"" % (sdate, logUnix, dateUnix))
if sdate.startswith('audit('):
# yes, special case, the group only matches the number
self.assertEqual(logMatch.group(), '1106513999.000')
else:
self.assertEqual(logMatch.group(), sdate)
self.assertEqual(logUnix, dateUnix,
"getTime comparison failure for %s: by prefix %r \"%s\" is not \"%s\"" % (sdate, prefix, logUnix, dateUnix))
self.assertEqual(logMatch.group(), rdate)
else:
self.assertEqual(logtime, None, "getTime should have not matched for %r Got: %s" % (sdate, logtime))
self.assertEqual(logtime, None,
"getTime should have not matched for %r by prefix %r Got: %s" % (sdate, prefix, logtime))
# with getTime(matchTime) - this combination used in filter:
matchTime = self.__datedetector.matchTime(log)
(timeMatch, template) = matchTime = self.__datedetector.matchTime(log)
logtime = self.__datedetector.getTime(log, matchTime)
logSys.debug(' -- found - %r', template.name if timeMatch else False)
if should_match:
self.assertNotEqual(logtime, None, "getTime retrieved nothing: failure for %s, anchored: %r, log: %s" % ( sdate, anchored, log))
self.assertNotEqual(logtime, None,
"getTime retrieved nothing: failure for %s by prefix %r, anchored: %r, log: %s" % ( sdate, prefix, anchored, log))
( logUnix, logMatch ) = logtime
self.assertEqual(logUnix, dateUnix, "getTime comparison failure for %s: \"%s\" is not \"%s\"" % (sdate, logUnix, dateUnix))
if sdate.startswith('audit('):
# yes, special case, the group only matches the number
self.assertEqual(logMatch.group(), '1106513999.000')
else:
self.assertEqual(logMatch.group(), sdate)
self.assertEqual(logUnix, dateUnix,
"getTime comparison failure for %s by prefix %r: \"%s\" is not \"%s\"" % (sdate, prefix, logUnix, dateUnix))
self.assertEqual(logMatch.group(), rdate)
else:
self.assertEqual(logtime, None, "getTime should have not matched for %r Got: %s" % (sdate, logtime))
self.assertEqual(logtime, None,
"getTime should have not matched for %r by prefix %r Got: %s" % (sdate, prefix, logtime))
logSys.debug(' -- OK')
def testAllUniqueTemplateNames(self):
self.assertRaises(ValueError, self.__datedetector.appendTemplate,

View File

@ -9,3 +9,8 @@
2013-12-31 17:39:54.767815 [WARNING] sofia_reg.c:2531 Can't find user [1001@192.168.2.51] from 5.11.47.236
# failJSON: { "time": "2013-12-31T17:39:54", "match": true, "host": "185.24.234.141" }
2013-12-31 17:39:54.767815 [WARNING] sofia_reg.c:2531 Can't find user [100@192.168.2.51] from 185.24.234.141
# failJSON: { "time": "2016-09-25T18:57:58", "match": true, "host": "192.0.2.1", "desc": "Systemd dual time with prefix - 1st expr" }
2016-09-25T18:57:58.150982 www.srv.tld freeswitch[122921]: 2016-09-25 18:57:58.150982 [WARNING] sofia_reg.c:2889 Can't find user [201@::1] from 192.0.2.1
# failJSON: { "time": "2016-09-25T18:57:58", "match": true, "host": "192.0.2.2", "desc": "Systemd dual time with prefix - 2nd expr" }
2016-09-25T18:57:58.150982 www.srv.tld freeswitch[122921]: 2016-09-25 18:57:58.150982 [WARNING] sofia_reg.c:1720 SIP auth failure (INVITE) on sofia profile 'sipinterface_1' for [9810972597751739@::1] from ip 192.0.2.2