Only remember log lines we need to print later.

When testing proccessing a large logfile (430 MB, 2M lines) without
--print-all-missed or --print-all-ignored, fail2ban-regex consumes
just over 2 GB of RAM before completing.  Even though it does not need
to retain any of the missed or ignored log lines, it does anyway.

With this patch, memory use never grows beyond about 11 MB (unless
--print-all-* are enabled).
This commit is contained in:
Hank Leininger
2014-03-13 23:18:08 -04:00
parent 27dafea281
commit aa7af6f9c2

View File

@@ -156,22 +156,16 @@ class LineStats(object):
"""
def __init__(self):
self.tested = self.matched = 0
self.missed = 0
self.missed_lines = []
self.missed_lines_timeextracted = []
self.ignored = 0
self.ignored_lines = []
self.ignored_lines_timeextracted = []
def __str__(self):
return "%(tested)d lines, %(ignored)d ignored, %(matched)d matched, %(missed)d missed" % self
@property
def ignored(self):
return len(self.ignored_lines)
@property
def missed(self):
return self.tested - (self.ignored + self.matched)
# just for convenient str
def __getitem__(self, key):
return getattr(self, key)
@@ -267,16 +261,20 @@ class Fail2banRegex(object):
line_datetimestripped, ret = fail2banRegex.testRegex(line)
if is_ignored:
self._line_stats.ignored_lines.append(line)
self._line_stats.ignored_lines_timeextracted.append(line_datetimestripped)
self._line_stats.ignored += 1
if self._print_all_ignored:
self._line_stats.ignored_lines.append(line)
self._line_stats.ignored_lines_timeextracted.append(line_datetimestripped)
if len(ret) > 0:
assert(not is_ignored)
self._line_stats.matched += 1
else:
if not is_ignored:
self._line_stats.missed_lines.append(line)
self._line_stats.missed_lines_timeextracted.append(line_datetimestripped)
self._line_stats.missed += 1
if self._print_all_missed:
self._line_stats.missed_lines.append(line)
self._line_stats.missed_lines_timeextracted.append(line_datetimestripped)
self._line_stats.tested += 1
if line_no % 10 == 0: