Only remember log lines we need to print later.

When testing proccessing a large logfile (430 MB, 2M lines) without
--print-all-missed or --print-all-ignored, fail2ban-regex consumes
just over 2 GB of RAM before completing.  Even though it does not need
to retain any of the missed or ignored log lines, it does anyway.

With this patch, memory use never grows beyond about 11 MB (unless
--print-all-* are enabled).
pull/644/head
Hank Leininger 2014-03-13 23:18:08 -04:00
parent 27dafea281
commit aa7af6f9c2
Failed to extract signature
1 changed files with 10 additions and 12 deletions

View File

@ -156,22 +156,16 @@ class LineStats(object):
"""
def __init__(self):
self.tested = self.matched = 0
self.missed = 0
self.missed_lines = []
self.missed_lines_timeextracted = []
self.ignored = 0
self.ignored_lines = []
self.ignored_lines_timeextracted = []
def __str__(self):
return "%(tested)d lines, %(ignored)d ignored, %(matched)d matched, %(missed)d missed" % self
@property
def ignored(self):
return len(self.ignored_lines)
@property
def missed(self):
return self.tested - (self.ignored + self.matched)
# just for convenient str
def __getitem__(self, key):
return getattr(self, key)
@ -267,16 +261,20 @@ class Fail2banRegex(object):
line_datetimestripped, ret = fail2banRegex.testRegex(line)
if is_ignored:
self._line_stats.ignored_lines.append(line)
self._line_stats.ignored_lines_timeextracted.append(line_datetimestripped)
self._line_stats.ignored += 1
if self._print_all_ignored:
self._line_stats.ignored_lines.append(line)
self._line_stats.ignored_lines_timeextracted.append(line_datetimestripped)
if len(ret) > 0:
assert(not is_ignored)
self._line_stats.matched += 1
else:
if not is_ignored:
self._line_stats.missed_lines.append(line)
self._line_stats.missed_lines_timeextracted.append(line_datetimestripped)
self._line_stats.missed += 1
if self._print_all_missed:
self._line_stats.missed_lines.append(line)
self._line_stats.missed_lines_timeextracted.append(line_datetimestripped)
self._line_stats.tested += 1
if line_no % 10 == 0: