mirror of https://github.com/fail2ban/fail2ban
readline fixed to consider interim new-line character as part of code point in multi-byte logs (e. g. unicode: utf-16be, utf-16le);
suppress warning "Error decoding line" for incomplete line (produced by not fully read multi-byte new-line character at end of data); added test coverage for such logspull/2337/merge
parent
996920cdaa
commit
cbac7c176a
|
@ -1398,6 +1398,9 @@ class FileContainer:
|
||||||
try:
|
try:
|
||||||
return line.decode(enc, 'strict')
|
return line.decode(enc, 'strict')
|
||||||
except (UnicodeDecodeError, UnicodeEncodeError) as e:
|
except (UnicodeDecodeError, UnicodeEncodeError) as e:
|
||||||
|
# avoid warning if got incomplete end of line (e. g. '\n' in "...[0A" followed by "00]..." for utf-16le:
|
||||||
|
if (e.end == len(line) and line[e.start] in b'\r\n'):
|
||||||
|
return line[0:e.start].decode(enc, 'replace')
|
||||||
global _decode_line_warn
|
global _decode_line_warn
|
||||||
lev = 7
|
lev = 7
|
||||||
if not _decode_line_warn.get(filename, 0):
|
if not _decode_line_warn.get(filename, 0):
|
||||||
|
@ -1406,9 +1409,9 @@ class FileContainer:
|
||||||
logSys.log(lev,
|
logSys.log(lev,
|
||||||
"Error decoding line from '%s' with '%s'.", filename, enc)
|
"Error decoding line from '%s' with '%s'.", filename, enc)
|
||||||
if logSys.getEffectiveLevel() <= lev:
|
if logSys.getEffectiveLevel() <= lev:
|
||||||
logSys.log(lev, "Consider setting logencoding=utf-8 (or another appropriate"
|
logSys.log(lev,
|
||||||
" encoding) for this jail. Continuing"
|
"Consider setting logencoding to appropriate encoding for this jail. "
|
||||||
" to process line ignoring invalid characters: %r",
|
"Continuing to process line ignoring invalid characters: %r",
|
||||||
line)
|
line)
|
||||||
# decode with replacing error chars:
|
# decode with replacing error chars:
|
||||||
line = line.decode(enc, 'replace')
|
line = line.decode(enc, 'replace')
|
||||||
|
@ -1422,23 +1425,51 @@ class FileContainer:
|
||||||
if line is not complete (and complete=True) or there is no content to read.
|
if line is not complete (and complete=True) or there is no content to read.
|
||||||
If line is complete (and complete is True), it also shift current known
|
If line is complete (and complete is True), it also shift current known
|
||||||
position to begin of next line.
|
position to begin of next line.
|
||||||
|
|
||||||
|
Also it is safe against interim new-line bytes (e. g. part of multi-byte char)
|
||||||
|
in given encoding.
|
||||||
"""
|
"""
|
||||||
if self.__handler is None:
|
if self.__handler is None:
|
||||||
return ""
|
return ""
|
||||||
rl = self.__handler.readline()
|
# read raw bytes up to \n char:
|
||||||
if rl == b'':
|
b = self.__handler.readline()
|
||||||
|
if not b:
|
||||||
return None
|
return None
|
||||||
# trim new-line here and check the line was written complete (contains a new-line):
|
bl = len(b)
|
||||||
l = rl.rstrip(b'\r\n')
|
# convert to log-encoding (new-line char could disappear if it is part of multi-byte sequence):
|
||||||
if self.inOperation and complete:
|
r = FileContainer.decode_line(
|
||||||
if l == rl:
|
self.getFileName(), self.getEncoding(), b)
|
||||||
|
# trim new-line at end and check the line was written complete (contains a new-line):
|
||||||
|
l = r.rstrip('\r\n')
|
||||||
|
if complete:
|
||||||
|
if l == r:
|
||||||
|
# try to fill buffer in order to find line-end in log encoding:
|
||||||
|
fnd = 0
|
||||||
|
while 1:
|
||||||
|
r = self.__handler.readline()
|
||||||
|
if not r:
|
||||||
|
break
|
||||||
|
b += r
|
||||||
|
bl += len(r)
|
||||||
|
# convert to log-encoding:
|
||||||
|
r = FileContainer.decode_line(
|
||||||
|
self.getFileName(), self.getEncoding(), b)
|
||||||
|
# ensure new-line is not in the middle (buffered 2 strings, e. g. in utf-16le it is "...[0A"+"00]..."):
|
||||||
|
e = r.find('\n')
|
||||||
|
if e >= 0 and e != len(r)-1:
|
||||||
|
l, r = r[0:e], r[0:e+1]
|
||||||
|
# back to bytes and get offset to seek after NL:
|
||||||
|
r = r.encode(self.getEncoding(), 'replace')
|
||||||
|
self.__handler.seek(-bl+len(r), 1)
|
||||||
|
return l
|
||||||
|
# trim new-line at end and check the line was written complete (contains a new-line):
|
||||||
|
l = r.rstrip('\r\n')
|
||||||
|
if l != r:
|
||||||
|
return l
|
||||||
# not fulfilled - seek back and return:
|
# not fulfilled - seek back and return:
|
||||||
self.__handler.seek(self.__pos, 0)
|
self.__handler.seek(-bl, 1)
|
||||||
return None
|
return None
|
||||||
# shift position (to be able to seek back above):
|
return l
|
||||||
self.__pos += len(rl)
|
|
||||||
return FileContainer.decode_line(
|
|
||||||
self.getFileName(), self.getEncoding(), l)
|
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
if self.__handler is not None:
|
if self.__handler is not None:
|
||||||
|
|
|
@ -644,6 +644,19 @@ class LogFile(LogCaptureTestCase):
|
||||||
self.filter = FilterPoll(None)
|
self.filter = FilterPoll(None)
|
||||||
self.assertRaises(IOError, self.filter.addLogPath, LogFile.MISSING)
|
self.assertRaises(IOError, self.filter.addLogPath, LogFile.MISSING)
|
||||||
|
|
||||||
|
def testDecodeLineWarn(self):
|
||||||
|
# incomplete line (missing byte at end), warning is suppressed:
|
||||||
|
l = u"correct line\n"
|
||||||
|
r = l.encode('utf-16le')
|
||||||
|
self.assertEqual(FileContainer.decode_line('TESTFILE', 'utf-16le', r), l)
|
||||||
|
self.assertEqual(FileContainer.decode_line('TESTFILE', 'utf-16le', r[0:-1]), l[0:-1])
|
||||||
|
self.assertNotLogged('Error decoding line')
|
||||||
|
# complete line (incorrect surrogate in the middle), warning is there:
|
||||||
|
r = b"incorrect \xc8\x0a line\n"
|
||||||
|
l = r.decode('utf-8', 'replace')
|
||||||
|
self.assertEqual(FileContainer.decode_line('TESTFILE', 'utf-8', r), l)
|
||||||
|
self.assertLogged('Error decoding line')
|
||||||
|
|
||||||
|
|
||||||
class LogFileFilterPoll(unittest.TestCase):
|
class LogFileFilterPoll(unittest.TestCase):
|
||||||
|
|
||||||
|
@ -1633,6 +1646,7 @@ class GetFailures(LogCaptureTestCase):
|
||||||
def testCRLFFailures01(self):
|
def testCRLFFailures01(self):
|
||||||
# We first adjust logfile/failures to end with CR+LF
|
# We first adjust logfile/failures to end with CR+LF
|
||||||
fname = tempfile.mktemp(prefix='tmp_fail2ban', suffix='crlf')
|
fname = tempfile.mktemp(prefix='tmp_fail2ban', suffix='crlf')
|
||||||
|
try:
|
||||||
# poor man unix2dos:
|
# poor man unix2dos:
|
||||||
fin, fout = open(GetFailures.FILENAME_01, 'rb'), open(fname, 'wb')
|
fin, fout = open(GetFailures.FILENAME_01, 'rb'), open(fname, 'wb')
|
||||||
for l in fin.read().splitlines():
|
for l in fin.read().splitlines():
|
||||||
|
@ -1642,8 +1656,40 @@ class GetFailures(LogCaptureTestCase):
|
||||||
|
|
||||||
# now see if we should be getting the "same" failures
|
# now see if we should be getting the "same" failures
|
||||||
self.testGetFailures01(filename=fname)
|
self.testGetFailures01(filename=fname)
|
||||||
|
finally:
|
||||||
_killfile(fout, fname)
|
_killfile(fout, fname)
|
||||||
|
|
||||||
|
def testNLCharAsPartOfUniChar(self):
|
||||||
|
fname = tempfile.mktemp(prefix='tmp_fail2ban', suffix='crlf')
|
||||||
|
# test two multi-byte encodings (both contains `\x0A` in either \x02\x0A or \x0A\x02):
|
||||||
|
for enc in ('utf-16be', 'utf-16le'):
|
||||||
|
self.pruneLog("[test-phase encoding=%s]" % enc)
|
||||||
|
try:
|
||||||
|
fout = open(fname, 'wb')
|
||||||
|
tm = int(time.time())
|
||||||
|
# test on unicode string containing \x0A as part of uni-char,
|
||||||
|
# it must produce exactly 2 lines (both are failures):
|
||||||
|
for l in (
|
||||||
|
u'%s \u20AC Failed auth: invalid user Test\u020A from 192.0.2.1\n' % tm,
|
||||||
|
u'%s \u20AC Failed auth: invalid user TestI from 192.0.2.2\n' % tm
|
||||||
|
):
|
||||||
|
fout.write(l.encode(enc))
|
||||||
|
fout.close()
|
||||||
|
|
||||||
|
self.filter.setLogEncoding(enc)
|
||||||
|
self.filter.addLogPath(fname, autoSeek=0)
|
||||||
|
self.filter.setDatePattern((r'^EPOCH',))
|
||||||
|
self.filter.addFailRegex(r"Failed .* from <HOST>")
|
||||||
|
self.filter.getFailures(fname)
|
||||||
|
self.assertLogged(
|
||||||
|
"[DummyJail] Found 192.0.2.1",
|
||||||
|
"[DummyJail] Found 192.0.2.2", all=True, wait=True)
|
||||||
|
finally:
|
||||||
|
_killfile(fout, fname)
|
||||||
|
self.filter.delLogPath(fname)
|
||||||
|
# must find 4 failures and generate 2 tickets (2 IPs with each 2 failures):
|
||||||
|
self.assertEqual(self.filter.failManager.getFailCount(), (2, 4))
|
||||||
|
|
||||||
def testGetFailures02(self):
|
def testGetFailures02(self):
|
||||||
output = ('141.3.81.106', 4, 1124013539.0,
|
output = ('141.3.81.106', 4, 1124013539.0,
|
||||||
[u'Aug 14 11:%d:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:141.3.81.106 port 51332 ssh2'
|
[u'Aug 14 11:%d:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:141.3.81.106 port 51332 ssh2'
|
||||||
|
|
Loading…
Reference in New Issue