mirror of https://github.com/fail2ban/fail2ban
readline fixed to consider interim new-line character as part of code point in multi-byte logs (e. g. unicode: utf-16be, utf-16le);
suppress warning "Error decoding line" for incomplete line (produced by not fully read multi-byte new-line character at end of data); added test coverage for such logspull/2337/merge
parent
996920cdaa
commit
cbac7c176a
|
@ -1398,6 +1398,9 @@ class FileContainer:
|
|||
try:
|
||||
return line.decode(enc, 'strict')
|
||||
except (UnicodeDecodeError, UnicodeEncodeError) as e:
|
||||
# avoid warning if got incomplete end of line (e. g. '\n' in "...[0A" followed by "00]..." for utf-16le:
|
||||
if (e.end == len(line) and line[e.start] in b'\r\n'):
|
||||
return line[0:e.start].decode(enc, 'replace')
|
||||
global _decode_line_warn
|
||||
lev = 7
|
||||
if not _decode_line_warn.get(filename, 0):
|
||||
|
@ -1406,9 +1409,9 @@ class FileContainer:
|
|||
logSys.log(lev,
|
||||
"Error decoding line from '%s' with '%s'.", filename, enc)
|
||||
if logSys.getEffectiveLevel() <= lev:
|
||||
logSys.log(lev, "Consider setting logencoding=utf-8 (or another appropriate"
|
||||
" encoding) for this jail. Continuing"
|
||||
" to process line ignoring invalid characters: %r",
|
||||
logSys.log(lev,
|
||||
"Consider setting logencoding to appropriate encoding for this jail. "
|
||||
"Continuing to process line ignoring invalid characters: %r",
|
||||
line)
|
||||
# decode with replacing error chars:
|
||||
line = line.decode(enc, 'replace')
|
||||
|
@ -1422,23 +1425,51 @@ class FileContainer:
|
|||
if line is not complete (and complete=True) or there is no content to read.
|
||||
If line is complete (and complete is True), it also shift current known
|
||||
position to begin of next line.
|
||||
|
||||
Also it is safe against interim new-line bytes (e. g. part of multi-byte char)
|
||||
in given encoding.
|
||||
"""
|
||||
if self.__handler is None:
|
||||
return ""
|
||||
rl = self.__handler.readline()
|
||||
if rl == b'':
|
||||
# read raw bytes up to \n char:
|
||||
b = self.__handler.readline()
|
||||
if not b:
|
||||
return None
|
||||
# trim new-line here and check the line was written complete (contains a new-line):
|
||||
l = rl.rstrip(b'\r\n')
|
||||
if self.inOperation and complete:
|
||||
if l == rl:
|
||||
bl = len(b)
|
||||
# convert to log-encoding (new-line char could disappear if it is part of multi-byte sequence):
|
||||
r = FileContainer.decode_line(
|
||||
self.getFileName(), self.getEncoding(), b)
|
||||
# trim new-line at end and check the line was written complete (contains a new-line):
|
||||
l = r.rstrip('\r\n')
|
||||
if complete:
|
||||
if l == r:
|
||||
# try to fill buffer in order to find line-end in log encoding:
|
||||
fnd = 0
|
||||
while 1:
|
||||
r = self.__handler.readline()
|
||||
if not r:
|
||||
break
|
||||
b += r
|
||||
bl += len(r)
|
||||
# convert to log-encoding:
|
||||
r = FileContainer.decode_line(
|
||||
self.getFileName(), self.getEncoding(), b)
|
||||
# ensure new-line is not in the middle (buffered 2 strings, e. g. in utf-16le it is "...[0A"+"00]..."):
|
||||
e = r.find('\n')
|
||||
if e >= 0 and e != len(r)-1:
|
||||
l, r = r[0:e], r[0:e+1]
|
||||
# back to bytes and get offset to seek after NL:
|
||||
r = r.encode(self.getEncoding(), 'replace')
|
||||
self.__handler.seek(-bl+len(r), 1)
|
||||
return l
|
||||
# trim new-line at end and check the line was written complete (contains a new-line):
|
||||
l = r.rstrip('\r\n')
|
||||
if l != r:
|
||||
return l
|
||||
# not fulfilled - seek back and return:
|
||||
self.__handler.seek(self.__pos, 0)
|
||||
self.__handler.seek(-bl, 1)
|
||||
return None
|
||||
# shift position (to be able to seek back above):
|
||||
self.__pos += len(rl)
|
||||
return FileContainer.decode_line(
|
||||
self.getFileName(), self.getEncoding(), l)
|
||||
return l
|
||||
|
||||
def close(self):
|
||||
if self.__handler is not None:
|
||||
|
|
|
@ -644,6 +644,19 @@ class LogFile(LogCaptureTestCase):
|
|||
self.filter = FilterPoll(None)
|
||||
self.assertRaises(IOError, self.filter.addLogPath, LogFile.MISSING)
|
||||
|
||||
def testDecodeLineWarn(self):
|
||||
# incomplete line (missing byte at end), warning is suppressed:
|
||||
l = u"correct line\n"
|
||||
r = l.encode('utf-16le')
|
||||
self.assertEqual(FileContainer.decode_line('TESTFILE', 'utf-16le', r), l)
|
||||
self.assertEqual(FileContainer.decode_line('TESTFILE', 'utf-16le', r[0:-1]), l[0:-1])
|
||||
self.assertNotLogged('Error decoding line')
|
||||
# complete line (incorrect surrogate in the middle), warning is there:
|
||||
r = b"incorrect \xc8\x0a line\n"
|
||||
l = r.decode('utf-8', 'replace')
|
||||
self.assertEqual(FileContainer.decode_line('TESTFILE', 'utf-8', r), l)
|
||||
self.assertLogged('Error decoding line')
|
||||
|
||||
|
||||
class LogFileFilterPoll(unittest.TestCase):
|
||||
|
||||
|
@ -1633,16 +1646,49 @@ class GetFailures(LogCaptureTestCase):
|
|||
def testCRLFFailures01(self):
|
||||
# We first adjust logfile/failures to end with CR+LF
|
||||
fname = tempfile.mktemp(prefix='tmp_fail2ban', suffix='crlf')
|
||||
# poor man unix2dos:
|
||||
fin, fout = open(GetFailures.FILENAME_01, 'rb'), open(fname, 'wb')
|
||||
for l in fin.read().splitlines():
|
||||
fout.write(l + b'\r\n')
|
||||
fin.close()
|
||||
fout.close()
|
||||
try:
|
||||
# poor man unix2dos:
|
||||
fin, fout = open(GetFailures.FILENAME_01, 'rb'), open(fname, 'wb')
|
||||
for l in fin.read().splitlines():
|
||||
fout.write(l + b'\r\n')
|
||||
fin.close()
|
||||
fout.close()
|
||||
|
||||
# now see if we should be getting the "same" failures
|
||||
self.testGetFailures01(filename=fname)
|
||||
_killfile(fout, fname)
|
||||
# now see if we should be getting the "same" failures
|
||||
self.testGetFailures01(filename=fname)
|
||||
finally:
|
||||
_killfile(fout, fname)
|
||||
|
||||
def testNLCharAsPartOfUniChar(self):
|
||||
fname = tempfile.mktemp(prefix='tmp_fail2ban', suffix='crlf')
|
||||
# test two multi-byte encodings (both contains `\x0A` in either \x02\x0A or \x0A\x02):
|
||||
for enc in ('utf-16be', 'utf-16le'):
|
||||
self.pruneLog("[test-phase encoding=%s]" % enc)
|
||||
try:
|
||||
fout = open(fname, 'wb')
|
||||
tm = int(time.time())
|
||||
# test on unicode string containing \x0A as part of uni-char,
|
||||
# it must produce exactly 2 lines (both are failures):
|
||||
for l in (
|
||||
u'%s \u20AC Failed auth: invalid user Test\u020A from 192.0.2.1\n' % tm,
|
||||
u'%s \u20AC Failed auth: invalid user TestI from 192.0.2.2\n' % tm
|
||||
):
|
||||
fout.write(l.encode(enc))
|
||||
fout.close()
|
||||
|
||||
self.filter.setLogEncoding(enc)
|
||||
self.filter.addLogPath(fname, autoSeek=0)
|
||||
self.filter.setDatePattern((r'^EPOCH',))
|
||||
self.filter.addFailRegex(r"Failed .* from <HOST>")
|
||||
self.filter.getFailures(fname)
|
||||
self.assertLogged(
|
||||
"[DummyJail] Found 192.0.2.1",
|
||||
"[DummyJail] Found 192.0.2.2", all=True, wait=True)
|
||||
finally:
|
||||
_killfile(fout, fname)
|
||||
self.filter.delLogPath(fname)
|
||||
# must find 4 failures and generate 2 tickets (2 IPs with each 2 failures):
|
||||
self.assertEqual(self.filter.failManager.getFailCount(), (2, 4))
|
||||
|
||||
def testGetFailures02(self):
|
||||
output = ('141.3.81.106', 4, 1124013539.0,
|
||||
|
|
Loading…
Reference in New Issue