Merge branch 'fix-rc-on-too-many-failures' into 0.10: resolves RC with uncontrolled growth of failure list (jail with too many matches that did not cause ban, gh-2945)

pull/3117/head
sebres 4 years ago
commit e3d43d1241

@ -124,9 +124,10 @@ class FailManager:
return len(self.__failList)
def cleanup(self, time):
time -= self.__maxTime
with self.__lock:
todelete = [fid for fid,item in self.__failList.iteritems() \
if item.getTime() + self.__maxTime <= time]
if item.getTime() <= time]
if len(todelete) == len(self.__failList):
# remove all:
self.__failList = dict()
@ -140,7 +141,7 @@ class FailManager:
else:
# create new dictionary without items to be deleted:
self.__failList = dict((fid,item) for fid,item in self.__failList.iteritems() \
if item.getTime() + self.__maxTime > time)
if item.getTime() > time)
self.__bgSvc.service()
def delFailure(self, fid):

@ -93,6 +93,8 @@ class Filter(JailThread):
## Store last time stamp, applicable for multi-line
self.__lastTimeText = ""
self.__lastDate = None
## Next service (cleanup) time
self.__nextSvcTime = -(1<<63)
## if set, treat log lines without explicit time zone to be in this time zone
self.__logtimezone = None
## Default or preferred encoding (to decode bytes from file or journal):
@ -114,10 +116,10 @@ class Filter(JailThread):
self.checkFindTime = True
## shows that filter is in operation mode (processing new messages):
self.inOperation = True
## if true prevents against retarded banning in case of RC by too many failures (disabled only for test purposes):
self.banASAP = True
## Ticks counter
self.ticks = 0
## Processed lines counter
self.procLines = 0
## Thread name:
self.name="f2b/f."+self.jailName
@ -441,12 +443,23 @@ class Filter(JailThread):
def performBan(self, ip=None):
"""Performs a ban for IPs (or given ip) that are reached maxretry of the jail."""
try: # pragma: no branch - exception is the only way out
while True:
while True:
try:
ticket = self.failManager.toBan(ip)
self.jail.putFailTicket(ticket)
except FailManagerEmpty:
self.failManager.cleanup(MyTime.time())
except FailManagerEmpty:
break
self.jail.putFailTicket(ticket)
if ip: break
self.performSvc()
def performSvc(self, force=False):
"""Performs a service tasks (clean failure list)."""
tm = MyTime.time()
# avoid too early clean up:
if force or tm >= self.__nextSvcTime:
self.__nextSvcTime = tm + 5
# clean up failure list:
self.failManager.cleanup(tm)
def addAttempt(self, ip, *matches):
"""Generate a failed attempt for ip"""
@ -694,8 +707,12 @@ class Filter(JailThread):
attempts = self.failManager.addFailure(tick)
# avoid RC on busy filter (too many failures) - if attempts for IP/ID reached maxretry,
# we can speedup ban, so do it as soon as possible:
if self.banASAP and attempts >= self.failManager.getMaxRetry():
if attempts >= self.failManager.getMaxRetry():
self.performBan(ip)
self.procLines += 1
# every 100 lines check need to perform service tasks:
if self.procLines % 100 == 0:
self.performSvc()
# reset (halve) error counter (successfully processed line):
if self._errors:
self._errors //= 2
@ -1064,6 +1081,7 @@ class FileFilter(Filter):
# is created and is added to the FailManager.
def getFailures(self, filename, inOperation=None):
if self.idle: return False
log = self.getLog(filename)
if log is None:
logSys.error("Unable to get failures in %s", filename)

@ -55,7 +55,6 @@ class FilterGamin(FileFilter):
def __init__(self, jail):
FileFilter.__init__(self, jail)
self.__modified = False
# Gamin monitor
self.monitor = gamin.WatchMonitor()
fd = self.monitor.get_fd()
@ -67,21 +66,9 @@ class FilterGamin(FileFilter):
logSys.log(4, "Got event: " + repr(event) + " for " + path)
if event in (gamin.GAMCreated, gamin.GAMChanged, gamin.GAMExists):
logSys.debug("File changed: " + path)
self.__modified = True
self.ticks += 1
self._process_file(path)
def _process_file(self, path):
"""Process a given file
TODO -- RF:
this is a common logic and must be shared/provided by FileFilter
"""
self.getFailures(path)
if not self.banASAP: # pragma: no cover
self.performBan()
self.__modified = False
##
# Add a log file path
@ -128,6 +115,9 @@ class FilterGamin(FileFilter):
Utils.wait_for(lambda: not self.active or self._handleEvents(),
self.sleeptime)
self.ticks += 1
if self.ticks % 10 == 0:
self.performSvc()
logSys.debug("[%s] filter terminated", self.jailName)
return True

@ -27,9 +27,7 @@ __license__ = "GPL"
import os
import time
from .failmanager import FailManagerEmpty
from .filter import FileFilter
from .mytime import MyTime
from .utils import Utils
from ..helpers import getLogger, logging
@ -55,7 +53,6 @@ class FilterPoll(FileFilter):
def __init__(self, jail):
FileFilter.__init__(self, jail)
self.__modified = False
## The time of the last modification of the file.
self.__prevStats = dict()
self.__file404Cnt = dict()
@ -115,13 +112,10 @@ class FilterPoll(FileFilter):
break
for filename in modlst:
self.getFailures(filename)
self.__modified = True
self.ticks += 1
if self.__modified:
if not self.banASAP: # pragma: no cover
self.performBan()
self.__modified = False
if self.ticks % 10 == 0:
self.performSvc()
except Exception as e: # pragma: no cover
if not self.active: # if not active - error by stop...
break

@ -75,7 +75,6 @@ class FilterPyinotify(FileFilter):
def __init__(self, jail):
FileFilter.__init__(self, jail)
self.__modified = False
# Pyinotify watch manager
self.__monitor = pyinotify.WatchManager()
self.__notifier = None
@ -140,9 +139,6 @@ class FilterPyinotify(FileFilter):
"""
if not self.idle:
self.getFailures(path)
if not self.banASAP: # pragma: no cover
self.performBan()
self.__modified = False
def _addPending(self, path, reason, isDir=False):
if path not in self.__pending:
@ -352,9 +348,14 @@ class FilterPyinotify(FileFilter):
if not self.active: break
self.__notifier.read_events()
self.ticks += 1
# check pending files/dirs (logrotate ready):
if not self.idle:
self._checkPending()
if self.idle:
continue
self._checkPending()
if self.ticks % 10 == 0:
self.performSvc()
except Exception as e: # pragma: no cover
if not self.active: # if not active - error by stop...
@ -364,8 +365,6 @@ class FilterPyinotify(FileFilter):
# incr common error counter:
self.commonError()
self.ticks += 1
logSys.debug("[%s] filter exited (pyinotifier)", self.jailName)
self.__notifier = None

@ -322,13 +322,12 @@ class FilterSystemd(JournalFilter): # pragma: systemd no cover
break
else:
break
if self.__modified:
if not self.banASAP: # pragma: no cover
self.performBan()
self.__modified = 0
# update position in log (time and iso string):
if self.jail.database is not None:
self.jail.database.updateJournal(self.jail, 'systemd-journal', line[1], line[0][1])
self.__modified = 0
if self.ticks % 10 == 0:
self.performSvc()
# update position in log (time and iso string):
if self.jail.database is not None:
self.jail.database.updateJournal(self.jail, 'systemd-journal', line[1], line[0][1])
except Exception as e: # pragma: no cover
if not self.active: # if not active - error by stop...
break

@ -164,18 +164,25 @@ def _assert_correct_last_attempt(utest, filter_, output, count=None):
# get fail ticket from jail
found.append(_ticket_tuple(filter_.getFailTicket()))
else:
# when we are testing without jails
# wait for failures (up to max time)
Utils.wait_for(
lambda: filter_.failManager.getFailCount() >= (tickcount, failcount),
_maxWaitTime(10))
# get fail ticket(s) from filter
while tickcount:
try:
found.append(_ticket_tuple(filter_.failManager.toBan()))
except FailManagerEmpty:
break
tickcount -= 1
# when we are testing without jails wait for failures (up to max time)
if filter_.jail:
while True:
t = filter_.jail.getFailTicket()
if not t: break
found.append(_ticket_tuple(t))
if found:
tickcount -= len(found)
if tickcount > 0:
Utils.wait_for(
lambda: filter_.failManager.getFailCount() >= (tickcount, failcount),
_maxWaitTime(10))
# get fail ticket(s) from filter
while tickcount:
try:
found.append(_ticket_tuple(filter_.failManager.toBan()))
except FailManagerEmpty:
break
tickcount -= 1
if not isinstance(output[0], (tuple,list)):
utest.assertEqual(len(found), 1)
@ -800,7 +807,6 @@ class LogFileMonitor(LogCaptureTestCase):
_, self.name = tempfile.mkstemp('fail2ban', 'monitorfailures')
self.file = open(self.name, 'a')
self.filter = FilterPoll(DummyJail())
self.filter.banASAP = False # avoid immediate ban in this tests
self.filter.addLogPath(self.name, autoSeek=False)
self.filter.active = True
self.filter.addFailRegex(r"(?:(?:Authentication failure|Failed [-/\w+]+) for(?: [iI](?:llegal|nvalid) user)?|[Ii](?:llegal|nvalid) user|ROOT LOGIN REFUSED) .*(?: from|FROM) <HOST>")
@ -960,7 +966,7 @@ class LogFileMonitor(LogCaptureTestCase):
os.rename(self.name, self.name + '.bak')
_copy_lines_between_files(GetFailures.FILENAME_01, self.name, skip=14, n=1).close()
self.filter.getFailures(self.name)
_assert_correct_last_attempt(self, self.filter, GetFailures.FAILURES_01)
#_assert_correct_last_attempt(self, self.filter, GetFailures.FAILURES_01)
self.assertEqual(self.filter.failManager.getFailTotal(), 3)
@ -1018,7 +1024,6 @@ def get_monitor_failures_testcase(Filter_):
self.file = open(self.name, 'a')
self.jail = DummyJail()
self.filter = Filter_(self.jail)
self.filter.banASAP = False # avoid immediate ban in this tests
self.filter.addLogPath(self.name, autoSeek=False)
# speedup search using exact date pattern:
self.filter.setDatePattern(r'^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?')
@ -1111,12 +1116,13 @@ def get_monitor_failures_testcase(Filter_):
skip=12, n=3, mode='w')
self.assert_correct_last_attempt(GetFailures.FAILURES_01)
def _wait4failures(self, count=2):
def _wait4failures(self, count=2, waitEmpty=True):
# Poll might need more time
self.assertTrue(self.isEmpty(_maxWaitTime(5)),
"Queue must be empty but it is not: %s."
% (', '.join([str(x) for x in self.jail.queue])))
self.assertRaises(FailManagerEmpty, self.filter.failManager.toBan)
if waitEmpty:
self.assertTrue(self.isEmpty(_maxWaitTime(5)),
"Queue must be empty but it is not: %s."
% (', '.join([str(x) for x in self.jail.queue])))
self.assertRaises(FailManagerEmpty, self.filter.failManager.toBan)
Utils.wait_for(lambda: self.filter.failManager.getFailTotal() >= count, _maxWaitTime(10))
self.assertEqual(self.filter.failManager.getFailTotal(), count)
@ -1277,14 +1283,14 @@ def get_monitor_failures_testcase(Filter_):
# tail written before, so let's not copy anything yet
#_copy_lines_between_files(GetFailures.FILENAME_01, self.name, n=100)
# we should detect the failures
self.assert_correct_last_attempt(GetFailures.FAILURES_01, count=6) # was needed if we write twice above
self.assert_correct_last_attempt(GetFailures.FAILURES_01, count=3) # was needed if we write twice above
# now copy and get even more
_copy_lines_between_files(GetFailures.FILENAME_01, self.file, skip=12, n=3)
# check for 3 failures (not 9), because 6 already get above...
self.assert_correct_last_attempt(GetFailures.FAILURES_01)
self.assert_correct_last_attempt(GetFailures.FAILURES_01, count=3)
# total count in this test:
self.assertEqual(self.filter.failManager.getFailTotal(), 12)
self._wait4failures(12, False)
cls = MonitorFailures
cls.__qualname__ = cls.__name__ = "MonitorFailures<%s>(%s)" \
@ -1316,7 +1322,6 @@ def get_monitor_failures_journal_testcase(Filter_): # pragma: systemd no cover
def _initFilter(self, **kwargs):
self._getRuntimeJournal() # check journal available
self.filter = Filter_(self.jail, **kwargs)
self.filter.banASAP = False # avoid immediate ban in this tests
self.filter.addJournalMatch([
"SYSLOG_IDENTIFIER=fail2ban-testcases",
"TEST_FIELD=1",
@ -1512,7 +1517,7 @@ def get_monitor_failures_journal_testcase(Filter_): # pragma: systemd no cover
"SYSLOG_IDENTIFIER=fail2ban-testcases",
"TEST_FIELD=1",
"TEST_UUID=%s" % self.test_uuid])
self.assert_correct_ban("193.168.0.128", 4)
self.assert_correct_ban("193.168.0.128", 3)
_copy_lines_to_journal(
self.test_file, self.journal_fields, n=6, skip=10)
# we should detect the failures
@ -1526,7 +1531,7 @@ def get_monitor_failures_journal_testcase(Filter_): # pragma: systemd no cover
self.test_file, self.journal_fields, skip=15, n=4)
self.waitForTicks(1)
self.assertTrue(self.isFilled(10))
self.assert_correct_ban("87.142.124.10", 4)
self.assert_correct_ban("87.142.124.10", 3)
# Add direct utf, unicode, blob:
for l in (
"error: PAM: Authentication failure for \xe4\xf6\xfc\xdf from 192.0.2.1",
@ -1570,7 +1575,6 @@ class GetFailures(LogCaptureTestCase):
setUpMyTime()
self.jail = DummyJail()
self.filter = FileFilter(self.jail)
self.filter.banASAP = False # avoid immediate ban in this tests
self.filter.active = True
# speedup search using exact date pattern:
self.filter.setDatePattern(r'^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?')
@ -1641,6 +1645,7 @@ class GetFailures(LogCaptureTestCase):
[u'Aug 14 11:%d:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:141.3.81.106 port 51332 ssh2'
% m for m in 53, 54, 57, 58])
self.filter.setMaxRetry(4)
self.filter.addLogPath(GetFailures.FILENAME_02, autoSeek=0)
self.filter.addFailRegex(r"Failed .* from <HOST>")
self.filter.getFailures(GetFailures.FILENAME_02)
@ -1649,6 +1654,7 @@ class GetFailures(LogCaptureTestCase):
def testGetFailures03(self):
output = ('203.162.223.135', 6, 1124013600.0)
self.filter.setMaxRetry(6)
self.filter.addLogPath(GetFailures.FILENAME_03, autoSeek=0)
self.filter.addFailRegex(r"error,relay=<HOST>,.*550 User unknown")
self.filter.getFailures(GetFailures.FILENAME_03)
@ -1657,6 +1663,7 @@ class GetFailures(LogCaptureTestCase):
def testGetFailures03_InOperation(self):
output = ('203.162.223.135', 9, 1124013600.0)
self.filter.setMaxRetry(9)
self.filter.addLogPath(GetFailures.FILENAME_03, autoSeek=0)
self.filter.addFailRegex(r"error,relay=<HOST>,.*550 User unknown")
self.filter.getFailures(GetFailures.FILENAME_03, inOperation=True)
@ -1674,7 +1681,7 @@ class GetFailures(LogCaptureTestCase):
def testGetFailures03_Seek2(self):
# same test as above but with seek to 'Aug 14 11:59:04' - so other output ...
output = ('203.162.223.135', 2, 1124013600.0)
self.filter.setMaxRetry(1)
self.filter.setMaxRetry(2)
self.filter.addLogPath(GetFailures.FILENAME_03, autoSeek=output[2])
self.filter.addFailRegex(r"error,relay=<HOST>,.*550 User unknown")
@ -1684,10 +1691,12 @@ class GetFailures(LogCaptureTestCase):
def testGetFailures04(self):
# because of not exact time in testcase04.log (no year), we should always use our test time:
self.assertEqual(MyTime.time(), 1124013600)
# should find exact 4 failures for *.186 and 2 failures for *.185
output = (('212.41.96.186', 4, 1124013600.0),
('212.41.96.185', 2, 1124013598.0))
# should find exact 4 failures for *.186 and 2 failures for *.185, but maxretry is 2, so 3 tickets:
output = (
('212.41.96.186', 2, 1124013480.0),
('212.41.96.186', 2, 1124013600.0),
('212.41.96.185', 2, 1124013598.0)
)
# speedup search using exact date pattern:
self.filter.setDatePattern((r'^%ExY(?P<_sep>[-/.])%m(?P=_sep)%d[T ]%H:%M:%S(?:[.,]%f)?(?:\s*%z)?',
r'^(?:%a )?%b %d %H:%M:%S(?:\.%f)?(?: %ExY)?',
@ -1744,9 +1753,11 @@ class GetFailures(LogCaptureTestCase):
unittest.F2B.SkipIfNoNetwork()
# We should still catch failures with usedns = no ;-)
output_yes = (
('93.184.216.34', 2, 1124013539.0,
[u'Aug 14 11:54:59 i60p295 sshd[12365]: Failed publickey for roehl from example.com port 51332 ssh2',
u'Aug 14 11:58:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:93.184.216.34 port 51332 ssh2']
('93.184.216.34', 1, 1124013299.0,
[u'Aug 14 11:54:59 i60p295 sshd[12365]: Failed publickey for roehl from example.com port 51332 ssh2']
),
('93.184.216.34', 1, 1124013539.0,
[u'Aug 14 11:58:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:93.184.216.34 port 51332 ssh2']
),
('2606:2800:220:1:248:1893:25c8:1946', 1, 1124013299.0,
[u'Aug 14 11:54:59 i60p295 sshd[12365]: Failed publickey for roehl from example.com port 51332 ssh2']
@ -1771,7 +1782,6 @@ class GetFailures(LogCaptureTestCase):
self.pruneLog("[test-phase useDns=%s]" % useDns)
jail = DummyJail()
filter_ = FileFilter(jail, useDns=useDns)
filter_.banASAP = False # avoid immediate ban in this tests
filter_.active = True
filter_.failManager.setMaxRetry(1) # we might have just few failures
@ -1781,8 +1791,11 @@ class GetFailures(LogCaptureTestCase):
_assert_correct_last_attempt(self, filter_, output)
def testGetFailuresMultiRegex(self):
output = ('141.3.81.106', 8, 1124013541.0)
output = [
('141.3.81.106', 8, 1124013541.0)
]
self.filter.setMaxRetry(8)
self.filter.addLogPath(GetFailures.FILENAME_02, autoSeek=False)
self.filter.addFailRegex(r"Failed .* from <HOST>")
self.filter.addFailRegex(r"Accepted .* from <HOST>")
@ -1800,26 +1813,25 @@ class GetFailures(LogCaptureTestCase):
self.assertRaises(FailManagerEmpty, self.filter.failManager.toBan)
def testGetFailuresMultiLine(self):
output = [("192.0.43.10", 2, 1124013599.0),
("192.0.43.11", 1, 1124013598.0)]
output = [
("192.0.43.10", 1, 1124013598.0),
("192.0.43.10", 1, 1124013599.0),
("192.0.43.11", 1, 1124013598.0)
]
self.filter.addLogPath(GetFailures.FILENAME_MULTILINE, autoSeek=False)
self.filter.setMaxLines(100)
self.filter.addFailRegex(r"^.*rsyncd\[(?P<pid>\d+)\]: connect from .+ \(<HOST>\)$<SKIPLINES>^.+ rsyncd\[(?P=pid)\]: rsync error: .*$")
self.filter.setMaxRetry(1)
self.filter.getFailures(GetFailures.FILENAME_MULTILINE)
foundList = []
while True:
try:
foundList.append(
_ticket_tuple(self.filter.failManager.toBan())[0:3])
except FailManagerEmpty:
break
self.assertSortedEqual(foundList, output)
_assert_correct_last_attempt(self, self.filter, output)
def testGetFailuresMultiLineIgnoreRegex(self):
output = [("192.0.43.10", 2, 1124013599.0)]
output = [
("192.0.43.10", 1, 1124013598.0),
("192.0.43.10", 1, 1124013599.0)
]
self.filter.addLogPath(GetFailures.FILENAME_MULTILINE, autoSeek=False)
self.filter.setMaxLines(100)
self.filter.addFailRegex(r"^.*rsyncd\[(?P<pid>\d+)\]: connect from .+ \(<HOST>\)$<SKIPLINES>^.+ rsyncd\[(?P=pid)\]: rsync error: .*$")
@ -1828,14 +1840,17 @@ class GetFailures(LogCaptureTestCase):
self.filter.getFailures(GetFailures.FILENAME_MULTILINE)
_assert_correct_last_attempt(self, self.filter, output.pop())
_assert_correct_last_attempt(self, self.filter, output)
self.assertRaises(FailManagerEmpty, self.filter.failManager.toBan)
def testGetFailuresMultiLineMultiRegex(self):
output = [("192.0.43.10", 2, 1124013599.0),
output = [
("192.0.43.10", 1, 1124013598.0),
("192.0.43.10", 1, 1124013599.0),
("192.0.43.11", 1, 1124013598.0),
("192.0.43.15", 1, 1124013598.0)]
("192.0.43.15", 1, 1124013598.0)
]
self.filter.addLogPath(GetFailures.FILENAME_MULTILINE, autoSeek=False)
self.filter.setMaxLines(100)
self.filter.addFailRegex(r"^.*rsyncd\[(?P<pid>\d+)\]: connect from .+ \(<HOST>\)$<SKIPLINES>^.+ rsyncd\[(?P=pid)\]: rsync error: .*$")
@ -1844,14 +1859,9 @@ class GetFailures(LogCaptureTestCase):
self.filter.getFailures(GetFailures.FILENAME_MULTILINE)
foundList = []
while True:
try:
foundList.append(
_ticket_tuple(self.filter.failManager.toBan())[0:3])
except FailManagerEmpty:
break
self.assertSortedEqual(foundList, output)
_assert_correct_last_attempt(self, self.filter, output)
self.assertRaises(FailManagerEmpty, self.filter.failManager.toBan)
class DNSUtilsTests(unittest.TestCase):

Loading…
Cancel
Save