mirror of https://github.com/fail2ban/fail2ban
performance optimization of `datepattern` (better search algorithm);
datetemplate: improved anchor detection for capturing groups `(^...)`; introduced new prefix `{UNB}` for `datepattern` to disable word boundaries in regex; datedetector: speedup special case if only one template is defined (every match wins - no collision, no sorting, no other best match possible)pull/2651/head
parent
2e42b98cd3
commit
4766547e1f
|
@ -37,6 +37,9 @@ ver. 0.10.6-dev (20??/??/??) - development edition
|
||||||
### New Features
|
### New Features
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
|
* introduced new prefix `{UNB}` for `datepattern` to disable word boundaries in regex;
|
||||||
|
* datetemplate: improved anchor detection for capturing groups `(^...)`;
|
||||||
|
* performance optimization of `datepattern` (better search algorithm in datedetector, especially for single template);
|
||||||
|
|
||||||
|
|
||||||
ver. 0.10.5 (2020/01/10) - deserve-more-respect-a-jedis-weapon-must
|
ver. 0.10.5 (2020/01/10) - deserve-more-respect-a-jedis-weapon-must
|
||||||
|
|
|
@ -337,65 +337,76 @@ class DateDetector(object):
|
||||||
# if no templates specified - default templates should be used:
|
# if no templates specified - default templates should be used:
|
||||||
if not len(self.__templates):
|
if not len(self.__templates):
|
||||||
self.addDefaultTemplate()
|
self.addDefaultTemplate()
|
||||||
logSys.log(logLevel-1, "try to match time for line: %.120s", line)
|
log = logSys.log if logSys.getEffectiveLevel() <= logLevel else lambda *args: None
|
||||||
match = None
|
log(logLevel-1, "try to match time for line: %.120s", line)
|
||||||
|
|
||||||
# first try to use last template with same start/end position:
|
# first try to use last template with same start/end position:
|
||||||
|
match = None
|
||||||
|
found = None, 0x7fffffff, 0x7fffffff, -1
|
||||||
ignoreBySearch = 0x7fffffff
|
ignoreBySearch = 0x7fffffff
|
||||||
i = self.__lastTemplIdx
|
i = self.__lastTemplIdx
|
||||||
if i < len(self.__templates):
|
if i < len(self.__templates):
|
||||||
ddtempl = self.__templates[i]
|
ddtempl = self.__templates[i]
|
||||||
template = ddtempl.template
|
template = ddtempl.template
|
||||||
if template.flags & (DateTemplate.LINE_BEGIN|DateTemplate.LINE_END):
|
if template.flags & (DateTemplate.LINE_BEGIN|DateTemplate.LINE_END):
|
||||||
if logSys.getEffectiveLevel() <= logLevel-1: # pragma: no cover - very-heavy debug
|
log(logLevel-1, " try to match last anchored template #%02i ...", i)
|
||||||
logSys.log(logLevel-1, " try to match last anchored template #%02i ...", i)
|
|
||||||
match = template.matchDate(line)
|
match = template.matchDate(line)
|
||||||
ignoreBySearch = i
|
ignoreBySearch = i
|
||||||
else:
|
else:
|
||||||
distance, endpos = self.__lastPos[0], self.__lastEndPos[0]
|
distance, endpos = self.__lastPos[0], self.__lastEndPos[0]
|
||||||
if logSys.getEffectiveLevel() <= logLevel-1:
|
log(logLevel-1, " try to match last template #%02i (from %r to %r): ...%r==%r %s %r==%r...",
|
||||||
logSys.log(logLevel-1, " try to match last template #%02i (from %r to %r): ...%r==%r %s %r==%r...",
|
i, distance, endpos,
|
||||||
i, distance, endpos,
|
line[distance-1:distance], self.__lastPos[1],
|
||||||
line[distance-1:distance], self.__lastPos[1],
|
line[distance:endpos],
|
||||||
line[distance:endpos],
|
line[endpos:endpos+1], self.__lastEndPos[2])
|
||||||
line[endpos:endpos+1], self.__lastEndPos[1])
|
# check same boundaries left/right, outside fully equal, inside only if not alnum (e. g. bound RE
|
||||||
# check same boundaries left/right, otherwise possible collision/pattern switch:
|
# with space or some special char), otherwise possible collision/pattern switch:
|
||||||
if (line[distance-1:distance] == self.__lastPos[1] and
|
if ((
|
||||||
line[endpos:endpos+1] == self.__lastEndPos[1]
|
line[distance-1:distance] == self.__lastPos[1] or
|
||||||
):
|
(line[distance] == self.__lastPos[2] and not self.__lastPos[2].isalnum())
|
||||||
|
) and (
|
||||||
|
line[endpos:endpos+1] == self.__lastEndPos[2] or
|
||||||
|
(line[endpos-1] == self.__lastEndPos[1] and not self.__lastEndPos[1].isalnum())
|
||||||
|
)):
|
||||||
|
# search in line part only:
|
||||||
|
log(logLevel-1, " boundaries are correct, search in part %r", line[distance:endpos])
|
||||||
match = template.matchDate(line, distance, endpos)
|
match = template.matchDate(line, distance, endpos)
|
||||||
|
else:
|
||||||
|
log(logLevel-1, " boundaries show conflict, try whole search")
|
||||||
|
match = template.matchDate(line)
|
||||||
|
ignoreBySearch = i
|
||||||
if match:
|
if match:
|
||||||
distance = match.start()
|
distance = match.start()
|
||||||
endpos = match.end()
|
endpos = match.end()
|
||||||
# if different position, possible collision/pattern switch:
|
# if different position, possible collision/pattern switch:
|
||||||
if (
|
if (
|
||||||
|
len(self.__templates) == 1 or # single template:
|
||||||
template.flags & (DateTemplate.LINE_BEGIN|DateTemplate.LINE_END) or
|
template.flags & (DateTemplate.LINE_BEGIN|DateTemplate.LINE_END) or
|
||||||
(distance == self.__lastPos[0] and endpos == self.__lastEndPos[0])
|
(distance == self.__lastPos[0] and endpos == self.__lastEndPos[0])
|
||||||
):
|
):
|
||||||
logSys.log(logLevel, " matched last time template #%02i", i)
|
log(logLevel, " matched last time template #%02i", i)
|
||||||
else:
|
else:
|
||||||
logSys.log(logLevel, " ** last pattern collision - pattern change, search ...")
|
log(logLevel, " ** last pattern collision - pattern change, reserve & search ...")
|
||||||
|
found = match, distance, endpos, i; # save current best alternative
|
||||||
match = None
|
match = None
|
||||||
else:
|
else:
|
||||||
logSys.log(logLevel, " ** last pattern not found - pattern change, search ...")
|
log(logLevel, " ** last pattern not found - pattern change, search ...")
|
||||||
# search template and better match:
|
# search template and better match:
|
||||||
if not match:
|
if not match:
|
||||||
logSys.log(logLevel, " search template (%i) ...", len(self.__templates))
|
log(logLevel, " search template (%i) ...", len(self.__templates))
|
||||||
found = None, 0x7fffffff, 0x7fffffff, -1
|
|
||||||
i = 0
|
i = 0
|
||||||
for ddtempl in self.__templates:
|
for ddtempl in self.__templates:
|
||||||
if logSys.getEffectiveLevel() <= logLevel-1:
|
|
||||||
logSys.log(logLevel-1, " try template #%02i: %s", i, ddtempl.name)
|
|
||||||
if i == ignoreBySearch:
|
if i == ignoreBySearch:
|
||||||
i += 1
|
i += 1
|
||||||
continue
|
continue
|
||||||
|
log(logLevel-1, " try template #%02i: %s", i, ddtempl.name)
|
||||||
template = ddtempl.template
|
template = ddtempl.template
|
||||||
match = template.matchDate(line)
|
match = template.matchDate(line)
|
||||||
if match:
|
if match:
|
||||||
distance = match.start()
|
distance = match.start()
|
||||||
endpos = match.end()
|
endpos = match.end()
|
||||||
if logSys.getEffectiveLevel() <= logLevel:
|
log(logLevel, " matched time template #%02i (at %r <= %r, %r) %s",
|
||||||
logSys.log(logLevel, " matched time template #%02i (at %r <= %r, %r) %s",
|
i, distance, ddtempl.distance, self.__lastPos[0], template.name)
|
||||||
i, distance, ddtempl.distance, self.__lastPos[0], template.name)
|
|
||||||
## last (or single) template - fast stop:
|
## last (or single) template - fast stop:
|
||||||
if i+1 >= len(self.__templates):
|
if i+1 >= len(self.__templates):
|
||||||
break
|
break
|
||||||
|
@ -408,7 +419,7 @@ class DateDetector(object):
|
||||||
## [grave] if distance changed, possible date-match was found somewhere
|
## [grave] if distance changed, possible date-match was found somewhere
|
||||||
## in body of message, so save this template, and search further:
|
## in body of message, so save this template, and search further:
|
||||||
if distance > ddtempl.distance or distance > self.__lastPos[0]:
|
if distance > ddtempl.distance or distance > self.__lastPos[0]:
|
||||||
logSys.log(logLevel, " ** distance collision - pattern change, reserve")
|
log(logLevel, " ** distance collision - pattern change, reserve")
|
||||||
## shortest of both:
|
## shortest of both:
|
||||||
if distance < found[1]:
|
if distance < found[1]:
|
||||||
found = match, distance, endpos, i
|
found = match, distance, endpos, i
|
||||||
|
@ -422,7 +433,7 @@ class DateDetector(object):
|
||||||
# check other template was found (use this one with shortest distance):
|
# check other template was found (use this one with shortest distance):
|
||||||
if not match and found[0]:
|
if not match and found[0]:
|
||||||
match, distance, endpos, i = found
|
match, distance, endpos, i = found
|
||||||
logSys.log(logLevel, " use best time template #%02i", i)
|
log(logLevel, " use best time template #%02i", i)
|
||||||
ddtempl = self.__templates[i]
|
ddtempl = self.__templates[i]
|
||||||
template = ddtempl.template
|
template = ddtempl.template
|
||||||
# we've winner, incr hits, set distance, usage, reorder, etc:
|
# we've winner, incr hits, set distance, usage, reorder, etc:
|
||||||
|
@ -432,8 +443,8 @@ class DateDetector(object):
|
||||||
ddtempl.distance = distance
|
ddtempl.distance = distance
|
||||||
if self.__firstUnused == i:
|
if self.__firstUnused == i:
|
||||||
self.__firstUnused += 1
|
self.__firstUnused += 1
|
||||||
self.__lastPos = distance, line[distance-1:distance]
|
self.__lastPos = distance, line[distance-1:distance], line[distance]
|
||||||
self.__lastEndPos = endpos, line[endpos:endpos+1]
|
self.__lastEndPos = endpos, line[endpos-1], line[endpos:endpos+1]
|
||||||
# if not first - try to reorder current template (bubble up), they will be not sorted anymore:
|
# if not first - try to reorder current template (bubble up), they will be not sorted anymore:
|
||||||
if i and i != self.__lastTemplIdx:
|
if i and i != self.__lastTemplIdx:
|
||||||
i = self._reorderTemplate(i)
|
i = self._reorderTemplate(i)
|
||||||
|
@ -442,7 +453,7 @@ class DateDetector(object):
|
||||||
return (match, template)
|
return (match, template)
|
||||||
|
|
||||||
# not found:
|
# not found:
|
||||||
logSys.log(logLevel, " no template.")
|
log(logLevel, " no template.")
|
||||||
return (None, None)
|
return (None, None)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -36,15 +36,16 @@ logSys = getLogger(__name__)
|
||||||
RE_GROUPED = re.compile(r'(?<!(?:\(\?))(?<!\\)\((?!\?)')
|
RE_GROUPED = re.compile(r'(?<!(?:\(\?))(?<!\\)\((?!\?)')
|
||||||
RE_GROUP = ( re.compile(r'^((?:\(\?\w+\))?\^?(?:\(\?\w+\))?)(.*?)(\$?)$'), r"\1(\2)\3" )
|
RE_GROUP = ( re.compile(r'^((?:\(\?\w+\))?\^?(?:\(\?\w+\))?)(.*?)(\$?)$'), r"\1(\2)\3" )
|
||||||
|
|
||||||
|
RE_EXLINE_NO_BOUNDS = re.compile(r'^\{UNB\}')
|
||||||
RE_EXLINE_BOUND_BEG = re.compile(r'^\{\^LN-BEG\}')
|
RE_EXLINE_BOUND_BEG = re.compile(r'^\{\^LN-BEG\}')
|
||||||
RE_EXSANC_BOUND_BEG = re.compile(r'^\(\?:\^\|\\b\|\\W\)')
|
RE_EXSANC_BOUND_BEG = re.compile(r'^\((?:\?:)?\^\|\\b\|\\W\)')
|
||||||
RE_EXEANC_BOUND_BEG = re.compile(r'\(\?=\\b\|\\W\|\$\)$')
|
RE_EXEANC_BOUND_BEG = re.compile(r'\(\?=\\b\|\\W\|\$\)$')
|
||||||
RE_NO_WRD_BOUND_BEG = re.compile(r'^\(*(?:\(\?\w+\))?(?:\^|\(*\*\*|\(\?:\^)')
|
RE_NO_WRD_BOUND_BEG = re.compile(r'^\(*(?:\(\?\w+\))?(?:\^|\(*\*\*|\((?:\?:)?\^)')
|
||||||
RE_NO_WRD_BOUND_END = re.compile(r'(?<!\\)(?:\$\)?|\\b|\\s|\*\*\)*)$')
|
RE_NO_WRD_BOUND_END = re.compile(r'(?<!\\)(?:\$\)?|\\b|\\s|\*\*\)*)$')
|
||||||
RE_DEL_WRD_BOUNDS = ( re.compile(r'^\(*(?:\(\?\w+\))?\(*\*\*|(?<!\\)\*\*\)*$'),
|
RE_DEL_WRD_BOUNDS = ( re.compile(r'^\(*(?:\(\?\w+\))?\(*\*\*|(?<!\\)\*\*\)*$'),
|
||||||
lambda m: m.group().replace('**', '') )
|
lambda m: m.group().replace('**', '') )
|
||||||
|
|
||||||
RE_LINE_BOUND_BEG = re.compile(r'^(?:\(\?\w+\))?(?:\^|\(\?:\^(?!\|))')
|
RE_LINE_BOUND_BEG = re.compile(r'^(?:\(\?\w+\))?(?:\^|\((?:\?:)?\^(?!\|))')
|
||||||
RE_LINE_BOUND_END = re.compile(r'(?<![\\\|])(?:\$\)?)$')
|
RE_LINE_BOUND_END = re.compile(r'(?<![\\\|])(?:\$\)?)$')
|
||||||
|
|
||||||
RE_ALPHA_PATTERN = re.compile(r'(?<!\%)\%[aAbBpc]')
|
RE_ALPHA_PATTERN = re.compile(r'(?<!\%)\%[aAbBpc]')
|
||||||
|
@ -119,7 +120,7 @@ class DateTemplate(object):
|
||||||
if boundBegin:
|
if boundBegin:
|
||||||
self.flags |= DateTemplate.WORD_BEGIN if wordBegin != 'start' else DateTemplate.LINE_BEGIN
|
self.flags |= DateTemplate.WORD_BEGIN if wordBegin != 'start' else DateTemplate.LINE_BEGIN
|
||||||
if wordBegin != 'start':
|
if wordBegin != 'start':
|
||||||
regex = r'(?:^|\b|\W)' + regex
|
regex = r'(?=^|\b|\W)' + regex
|
||||||
else:
|
else:
|
||||||
regex = r"^(?:\W{0,2})?" + regex
|
regex = r"^(?:\W{0,2})?" + regex
|
||||||
if not self.name.startswith('{^LN-BEG}'):
|
if not self.name.startswith('{^LN-BEG}'):
|
||||||
|
@ -128,8 +129,10 @@ class DateTemplate(object):
|
||||||
if boundEnd:
|
if boundEnd:
|
||||||
self.flags |= DateTemplate.WORD_END
|
self.flags |= DateTemplate.WORD_END
|
||||||
regex += r'(?=\b|\W|$)'
|
regex += r'(?=\b|\W|$)'
|
||||||
if RE_LINE_BOUND_BEG.search(regex): self.flags |= DateTemplate.LINE_BEGIN
|
if not (self.flags & DateTemplate.LINE_BEGIN) and RE_LINE_BOUND_BEG.search(regex):
|
||||||
if RE_LINE_BOUND_END.search(regex): self.flags |= DateTemplate.LINE_END
|
self.flags |= DateTemplate.LINE_BEGIN
|
||||||
|
if not (self.flags & DateTemplate.LINE_END) and RE_LINE_BOUND_END.search(regex):
|
||||||
|
self.flags |= DateTemplate.LINE_END
|
||||||
# remove possible special pattern "**" in front and end of regex:
|
# remove possible special pattern "**" in front and end of regex:
|
||||||
regex = RE_DEL_WRD_BOUNDS[0].sub(RE_DEL_WRD_BOUNDS[1], regex)
|
regex = RE_DEL_WRD_BOUNDS[0].sub(RE_DEL_WRD_BOUNDS[1], regex)
|
||||||
self._regex = regex
|
self._regex = regex
|
||||||
|
@ -188,7 +191,7 @@ class DateTemplate(object):
|
||||||
def unboundPattern(pattern):
|
def unboundPattern(pattern):
|
||||||
return RE_EXEANC_BOUND_BEG.sub('',
|
return RE_EXEANC_BOUND_BEG.sub('',
|
||||||
RE_EXSANC_BOUND_BEG.sub('',
|
RE_EXSANC_BOUND_BEG.sub('',
|
||||||
RE_EXLINE_BOUND_BEG.sub('', pattern)
|
RE_EXLINE_BOUND_BEG.sub('', RE_EXLINE_NO_BOUNDS.sub('', pattern))
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -297,6 +300,10 @@ class DatePatternRegex(DateTemplate):
|
||||||
def setRegex(self, pattern, wordBegin=True, wordEnd=True):
|
def setRegex(self, pattern, wordBegin=True, wordEnd=True):
|
||||||
# original pattern:
|
# original pattern:
|
||||||
self._pattern = pattern
|
self._pattern = pattern
|
||||||
|
# if unbound signalled - reset boundaries left and right:
|
||||||
|
if RE_EXLINE_NO_BOUNDS.search(pattern):
|
||||||
|
pattern = RE_EXLINE_NO_BOUNDS.sub('', pattern)
|
||||||
|
wordBegin = wordEnd = False
|
||||||
# if explicit given {^LN-BEG} - remove it from pattern and set 'start' in wordBegin:
|
# if explicit given {^LN-BEG} - remove it from pattern and set 'start' in wordBegin:
|
||||||
if wordBegin and RE_EXLINE_BOUND_BEG.search(pattern):
|
if wordBegin and RE_EXLINE_BOUND_BEG.search(pattern):
|
||||||
pattern = RE_EXLINE_BOUND_BEG.sub('', pattern)
|
pattern = RE_EXLINE_BOUND_BEG.sub('', pattern)
|
||||||
|
|
|
@ -330,6 +330,27 @@ class DateDetectorTest(LogCaptureTestCase):
|
||||||
dt = '2005 Jun 03'; self.assertEqual(t.matchDate(dt).group(1), dt)
|
dt = '2005 Jun 03'; self.assertEqual(t.matchDate(dt).group(1), dt)
|
||||||
dt = '2005 JUN 03'; self.assertEqual(t.matchDate(dt).group(1), dt)
|
dt = '2005 JUN 03'; self.assertEqual(t.matchDate(dt).group(1), dt)
|
||||||
|
|
||||||
|
def testNotAnchoredCollision(self):
|
||||||
|
# try for patterns with and without word boundaries:
|
||||||
|
for dp in (r'%H:%M:%S', r'{UNB}%H:%M:%S'):
|
||||||
|
dd = DateDetector()
|
||||||
|
dd.appendTemplate(dp)
|
||||||
|
# boundary of timestamp changes right and left (and time is left and right in line):
|
||||||
|
for fmt in ('%s test', '%8s test', 'test %s', 'test %8s'):
|
||||||
|
for dt in (
|
||||||
|
'00:01:02',
|
||||||
|
'00:01:2',
|
||||||
|
'00:1:2',
|
||||||
|
'0:1:2',
|
||||||
|
'00:1:2',
|
||||||
|
'00:01:2',
|
||||||
|
'00:01:02',
|
||||||
|
'0:1:2',
|
||||||
|
'00:01:02',
|
||||||
|
):
|
||||||
|
t = dd.getTime(fmt % dt)
|
||||||
|
self.assertEqual((t[0], t[1].group()), (1123970462.0, dt))
|
||||||
|
|
||||||
def testAmbiguousInOrderedTemplates(self):
|
def testAmbiguousInOrderedTemplates(self):
|
||||||
dd = self.datedetector
|
dd = self.datedetector
|
||||||
for (debit, line, cnt) in (
|
for (debit, line, cnt) in (
|
||||||
|
|
Loading…
Reference in New Issue