mirror of https://github.com/fail2ban/fail2ban
performance optimization of `datepattern` (better search algorithm);
datetemplate: improved anchor detection for capturing groups `(^...)`; introduced new prefix `{UNB}` for `datepattern` to disable word boundaries in regex; datedetector: speedup special case if only one template is defined (every match wins - no collision, no sorting, no other best match possible)pull/2651/head
parent
2e42b98cd3
commit
4766547e1f
|
@ -37,6 +37,9 @@ ver. 0.10.6-dev (20??/??/??) - development edition
|
|||
### New Features
|
||||
|
||||
### Enhancements
|
||||
* introduced new prefix `{UNB}` for `datepattern` to disable word boundaries in regex;
|
||||
* datetemplate: improved anchor detection for capturing groups `(^...)`;
|
||||
* performance optimization of `datepattern` (better search algorithm in datedetector, especially for single template);
|
||||
|
||||
|
||||
ver. 0.10.5 (2020/01/10) - deserve-more-respect-a-jedis-weapon-must
|
||||
|
|
|
@ -337,64 +337,75 @@ class DateDetector(object):
|
|||
# if no templates specified - default templates should be used:
|
||||
if not len(self.__templates):
|
||||
self.addDefaultTemplate()
|
||||
logSys.log(logLevel-1, "try to match time for line: %.120s", line)
|
||||
match = None
|
||||
log = logSys.log if logSys.getEffectiveLevel() <= logLevel else lambda *args: None
|
||||
log(logLevel-1, "try to match time for line: %.120s", line)
|
||||
|
||||
# first try to use last template with same start/end position:
|
||||
match = None
|
||||
found = None, 0x7fffffff, 0x7fffffff, -1
|
||||
ignoreBySearch = 0x7fffffff
|
||||
i = self.__lastTemplIdx
|
||||
if i < len(self.__templates):
|
||||
ddtempl = self.__templates[i]
|
||||
template = ddtempl.template
|
||||
if template.flags & (DateTemplate.LINE_BEGIN|DateTemplate.LINE_END):
|
||||
if logSys.getEffectiveLevel() <= logLevel-1: # pragma: no cover - very-heavy debug
|
||||
logSys.log(logLevel-1, " try to match last anchored template #%02i ...", i)
|
||||
log(logLevel-1, " try to match last anchored template #%02i ...", i)
|
||||
match = template.matchDate(line)
|
||||
ignoreBySearch = i
|
||||
else:
|
||||
distance, endpos = self.__lastPos[0], self.__lastEndPos[0]
|
||||
if logSys.getEffectiveLevel() <= logLevel-1:
|
||||
logSys.log(logLevel-1, " try to match last template #%02i (from %r to %r): ...%r==%r %s %r==%r...",
|
||||
log(logLevel-1, " try to match last template #%02i (from %r to %r): ...%r==%r %s %r==%r...",
|
||||
i, distance, endpos,
|
||||
line[distance-1:distance], self.__lastPos[1],
|
||||
line[distance:endpos],
|
||||
line[endpos:endpos+1], self.__lastEndPos[1])
|
||||
# check same boundaries left/right, otherwise possible collision/pattern switch:
|
||||
if (line[distance-1:distance] == self.__lastPos[1] and
|
||||
line[endpos:endpos+1] == self.__lastEndPos[1]
|
||||
):
|
||||
line[endpos:endpos+1], self.__lastEndPos[2])
|
||||
# check same boundaries left/right, outside fully equal, inside only if not alnum (e. g. bound RE
|
||||
# with space or some special char), otherwise possible collision/pattern switch:
|
||||
if ((
|
||||
line[distance-1:distance] == self.__lastPos[1] or
|
||||
(line[distance] == self.__lastPos[2] and not self.__lastPos[2].isalnum())
|
||||
) and (
|
||||
line[endpos:endpos+1] == self.__lastEndPos[2] or
|
||||
(line[endpos-1] == self.__lastEndPos[1] and not self.__lastEndPos[1].isalnum())
|
||||
)):
|
||||
# search in line part only:
|
||||
log(logLevel-1, " boundaries are correct, search in part %r", line[distance:endpos])
|
||||
match = template.matchDate(line, distance, endpos)
|
||||
else:
|
||||
log(logLevel-1, " boundaries show conflict, try whole search")
|
||||
match = template.matchDate(line)
|
||||
ignoreBySearch = i
|
||||
if match:
|
||||
distance = match.start()
|
||||
endpos = match.end()
|
||||
# if different position, possible collision/pattern switch:
|
||||
if (
|
||||
len(self.__templates) == 1 or # single template:
|
||||
template.flags & (DateTemplate.LINE_BEGIN|DateTemplate.LINE_END) or
|
||||
(distance == self.__lastPos[0] and endpos == self.__lastEndPos[0])
|
||||
):
|
||||
logSys.log(logLevel, " matched last time template #%02i", i)
|
||||
log(logLevel, " matched last time template #%02i", i)
|
||||
else:
|
||||
logSys.log(logLevel, " ** last pattern collision - pattern change, search ...")
|
||||
log(logLevel, " ** last pattern collision - pattern change, reserve & search ...")
|
||||
found = match, distance, endpos, i; # save current best alternative
|
||||
match = None
|
||||
else:
|
||||
logSys.log(logLevel, " ** last pattern not found - pattern change, search ...")
|
||||
log(logLevel, " ** last pattern not found - pattern change, search ...")
|
||||
# search template and better match:
|
||||
if not match:
|
||||
logSys.log(logLevel, " search template (%i) ...", len(self.__templates))
|
||||
found = None, 0x7fffffff, 0x7fffffff, -1
|
||||
log(logLevel, " search template (%i) ...", len(self.__templates))
|
||||
i = 0
|
||||
for ddtempl in self.__templates:
|
||||
if logSys.getEffectiveLevel() <= logLevel-1:
|
||||
logSys.log(logLevel-1, " try template #%02i: %s", i, ddtempl.name)
|
||||
if i == ignoreBySearch:
|
||||
i += 1
|
||||
continue
|
||||
log(logLevel-1, " try template #%02i: %s", i, ddtempl.name)
|
||||
template = ddtempl.template
|
||||
match = template.matchDate(line)
|
||||
if match:
|
||||
distance = match.start()
|
||||
endpos = match.end()
|
||||
if logSys.getEffectiveLevel() <= logLevel:
|
||||
logSys.log(logLevel, " matched time template #%02i (at %r <= %r, %r) %s",
|
||||
log(logLevel, " matched time template #%02i (at %r <= %r, %r) %s",
|
||||
i, distance, ddtempl.distance, self.__lastPos[0], template.name)
|
||||
## last (or single) template - fast stop:
|
||||
if i+1 >= len(self.__templates):
|
||||
|
@ -408,7 +419,7 @@ class DateDetector(object):
|
|||
## [grave] if distance changed, possible date-match was found somewhere
|
||||
## in body of message, so save this template, and search further:
|
||||
if distance > ddtempl.distance or distance > self.__lastPos[0]:
|
||||
logSys.log(logLevel, " ** distance collision - pattern change, reserve")
|
||||
log(logLevel, " ** distance collision - pattern change, reserve")
|
||||
## shortest of both:
|
||||
if distance < found[1]:
|
||||
found = match, distance, endpos, i
|
||||
|
@ -422,7 +433,7 @@ class DateDetector(object):
|
|||
# check other template was found (use this one with shortest distance):
|
||||
if not match and found[0]:
|
||||
match, distance, endpos, i = found
|
||||
logSys.log(logLevel, " use best time template #%02i", i)
|
||||
log(logLevel, " use best time template #%02i", i)
|
||||
ddtempl = self.__templates[i]
|
||||
template = ddtempl.template
|
||||
# we've winner, incr hits, set distance, usage, reorder, etc:
|
||||
|
@ -432,8 +443,8 @@ class DateDetector(object):
|
|||
ddtempl.distance = distance
|
||||
if self.__firstUnused == i:
|
||||
self.__firstUnused += 1
|
||||
self.__lastPos = distance, line[distance-1:distance]
|
||||
self.__lastEndPos = endpos, line[endpos:endpos+1]
|
||||
self.__lastPos = distance, line[distance-1:distance], line[distance]
|
||||
self.__lastEndPos = endpos, line[endpos-1], line[endpos:endpos+1]
|
||||
# if not first - try to reorder current template (bubble up), they will be not sorted anymore:
|
||||
if i and i != self.__lastTemplIdx:
|
||||
i = self._reorderTemplate(i)
|
||||
|
@ -442,7 +453,7 @@ class DateDetector(object):
|
|||
return (match, template)
|
||||
|
||||
# not found:
|
||||
logSys.log(logLevel, " no template.")
|
||||
log(logLevel, " no template.")
|
||||
return (None, None)
|
||||
|
||||
@property
|
||||
|
|
|
@ -36,15 +36,16 @@ logSys = getLogger(__name__)
|
|||
RE_GROUPED = re.compile(r'(?<!(?:\(\?))(?<!\\)\((?!\?)')
|
||||
RE_GROUP = ( re.compile(r'^((?:\(\?\w+\))?\^?(?:\(\?\w+\))?)(.*?)(\$?)$'), r"\1(\2)\3" )
|
||||
|
||||
RE_EXLINE_NO_BOUNDS = re.compile(r'^\{UNB\}')
|
||||
RE_EXLINE_BOUND_BEG = re.compile(r'^\{\^LN-BEG\}')
|
||||
RE_EXSANC_BOUND_BEG = re.compile(r'^\(\?:\^\|\\b\|\\W\)')
|
||||
RE_EXSANC_BOUND_BEG = re.compile(r'^\((?:\?:)?\^\|\\b\|\\W\)')
|
||||
RE_EXEANC_BOUND_BEG = re.compile(r'\(\?=\\b\|\\W\|\$\)$')
|
||||
RE_NO_WRD_BOUND_BEG = re.compile(r'^\(*(?:\(\?\w+\))?(?:\^|\(*\*\*|\(\?:\^)')
|
||||
RE_NO_WRD_BOUND_BEG = re.compile(r'^\(*(?:\(\?\w+\))?(?:\^|\(*\*\*|\((?:\?:)?\^)')
|
||||
RE_NO_WRD_BOUND_END = re.compile(r'(?<!\\)(?:\$\)?|\\b|\\s|\*\*\)*)$')
|
||||
RE_DEL_WRD_BOUNDS = ( re.compile(r'^\(*(?:\(\?\w+\))?\(*\*\*|(?<!\\)\*\*\)*$'),
|
||||
lambda m: m.group().replace('**', '') )
|
||||
|
||||
RE_LINE_BOUND_BEG = re.compile(r'^(?:\(\?\w+\))?(?:\^|\(\?:\^(?!\|))')
|
||||
RE_LINE_BOUND_BEG = re.compile(r'^(?:\(\?\w+\))?(?:\^|\((?:\?:)?\^(?!\|))')
|
||||
RE_LINE_BOUND_END = re.compile(r'(?<![\\\|])(?:\$\)?)$')
|
||||
|
||||
RE_ALPHA_PATTERN = re.compile(r'(?<!\%)\%[aAbBpc]')
|
||||
|
@ -119,7 +120,7 @@ class DateTemplate(object):
|
|||
if boundBegin:
|
||||
self.flags |= DateTemplate.WORD_BEGIN if wordBegin != 'start' else DateTemplate.LINE_BEGIN
|
||||
if wordBegin != 'start':
|
||||
regex = r'(?:^|\b|\W)' + regex
|
||||
regex = r'(?=^|\b|\W)' + regex
|
||||
else:
|
||||
regex = r"^(?:\W{0,2})?" + regex
|
||||
if not self.name.startswith('{^LN-BEG}'):
|
||||
|
@ -128,8 +129,10 @@ class DateTemplate(object):
|
|||
if boundEnd:
|
||||
self.flags |= DateTemplate.WORD_END
|
||||
regex += r'(?=\b|\W|$)'
|
||||
if RE_LINE_BOUND_BEG.search(regex): self.flags |= DateTemplate.LINE_BEGIN
|
||||
if RE_LINE_BOUND_END.search(regex): self.flags |= DateTemplate.LINE_END
|
||||
if not (self.flags & DateTemplate.LINE_BEGIN) and RE_LINE_BOUND_BEG.search(regex):
|
||||
self.flags |= DateTemplate.LINE_BEGIN
|
||||
if not (self.flags & DateTemplate.LINE_END) and RE_LINE_BOUND_END.search(regex):
|
||||
self.flags |= DateTemplate.LINE_END
|
||||
# remove possible special pattern "**" in front and end of regex:
|
||||
regex = RE_DEL_WRD_BOUNDS[0].sub(RE_DEL_WRD_BOUNDS[1], regex)
|
||||
self._regex = regex
|
||||
|
@ -188,7 +191,7 @@ class DateTemplate(object):
|
|||
def unboundPattern(pattern):
|
||||
return RE_EXEANC_BOUND_BEG.sub('',
|
||||
RE_EXSANC_BOUND_BEG.sub('',
|
||||
RE_EXLINE_BOUND_BEG.sub('', pattern)
|
||||
RE_EXLINE_BOUND_BEG.sub('', RE_EXLINE_NO_BOUNDS.sub('', pattern))
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -297,6 +300,10 @@ class DatePatternRegex(DateTemplate):
|
|||
def setRegex(self, pattern, wordBegin=True, wordEnd=True):
|
||||
# original pattern:
|
||||
self._pattern = pattern
|
||||
# if unbound signalled - reset boundaries left and right:
|
||||
if RE_EXLINE_NO_BOUNDS.search(pattern):
|
||||
pattern = RE_EXLINE_NO_BOUNDS.sub('', pattern)
|
||||
wordBegin = wordEnd = False
|
||||
# if explicit given {^LN-BEG} - remove it from pattern and set 'start' in wordBegin:
|
||||
if wordBegin and RE_EXLINE_BOUND_BEG.search(pattern):
|
||||
pattern = RE_EXLINE_BOUND_BEG.sub('', pattern)
|
||||
|
|
|
@ -330,6 +330,27 @@ class DateDetectorTest(LogCaptureTestCase):
|
|||
dt = '2005 Jun 03'; self.assertEqual(t.matchDate(dt).group(1), dt)
|
||||
dt = '2005 JUN 03'; self.assertEqual(t.matchDate(dt).group(1), dt)
|
||||
|
||||
def testNotAnchoredCollision(self):
|
||||
# try for patterns with and without word boundaries:
|
||||
for dp in (r'%H:%M:%S', r'{UNB}%H:%M:%S'):
|
||||
dd = DateDetector()
|
||||
dd.appendTemplate(dp)
|
||||
# boundary of timestamp changes right and left (and time is left and right in line):
|
||||
for fmt in ('%s test', '%8s test', 'test %s', 'test %8s'):
|
||||
for dt in (
|
||||
'00:01:02',
|
||||
'00:01:2',
|
||||
'00:1:2',
|
||||
'0:1:2',
|
||||
'00:1:2',
|
||||
'00:01:2',
|
||||
'00:01:02',
|
||||
'0:1:2',
|
||||
'00:01:02',
|
||||
):
|
||||
t = dd.getTime(fmt % dt)
|
||||
self.assertEqual((t[0], t[1].group()), (1123970462.0, dt))
|
||||
|
||||
def testAmbiguousInOrderedTemplates(self):
|
||||
dd = self.datedetector
|
||||
for (debit, line, cnt) in (
|
||||
|
|
Loading…
Reference in New Issue