From aa7af6f9c2b2feae17be7aafd5f3290c84494f8b Mon Sep 17 00:00:00 2001 From: Hank Leininger Date: Thu, 13 Mar 2014 23:18:08 -0400 Subject: [PATCH] Only remember log lines we need to print later. When testing proccessing a large logfile (430 MB, 2M lines) without --print-all-missed or --print-all-ignored, fail2ban-regex consumes just over 2 GB of RAM before completing. Even though it does not need to retain any of the missed or ignored log lines, it does anyway. With this patch, memory use never grows beyond about 11 MB (unless --print-all-* are enabled). --- fail2ban-regex | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/fail2ban-regex b/fail2ban-regex index 22fbbd67..941d0074 100755 --- a/fail2ban-regex +++ b/fail2ban-regex @@ -156,22 +156,16 @@ class LineStats(object): """ def __init__(self): self.tested = self.matched = 0 + self.missed = 0 self.missed_lines = [] self.missed_lines_timeextracted = [] + self.ignored = 0 self.ignored_lines = [] self.ignored_lines_timeextracted = [] def __str__(self): return "%(tested)d lines, %(ignored)d ignored, %(matched)d matched, %(missed)d missed" % self - @property - def ignored(self): - return len(self.ignored_lines) - - @property - def missed(self): - return self.tested - (self.ignored + self.matched) - # just for convenient str def __getitem__(self, key): return getattr(self, key) @@ -267,16 +261,20 @@ class Fail2banRegex(object): line_datetimestripped, ret = fail2banRegex.testRegex(line) if is_ignored: - self._line_stats.ignored_lines.append(line) - self._line_stats.ignored_lines_timeextracted.append(line_datetimestripped) + self._line_stats.ignored += 1 + if self._print_all_ignored: + self._line_stats.ignored_lines.append(line) + self._line_stats.ignored_lines_timeextracted.append(line_datetimestripped) if len(ret) > 0: assert(not is_ignored) self._line_stats.matched += 1 else: if not is_ignored: - self._line_stats.missed_lines.append(line) - self._line_stats.missed_lines_timeextracted.append(line_datetimestripped) + self._line_stats.missed += 1 + if self._print_all_missed: + self._line_stats.missed_lines.append(line) + self._line_stats.missed_lines_timeextracted.append(line_datetimestripped) self._line_stats.tested += 1 if line_no % 10 == 0: