Move handling of unicode decoding to FileContainer readline

Also print warning for unicode decode failure, leaving as str in python2
and ignoring erroneous characters in python3
pull/128/merge^2
Steven Hiscocks 12 years ago
parent 7e1819ed65
commit d23d365be2

@ -289,12 +289,6 @@ class Filter(JailThread):
def processLine(self, line):
"""Split the time portion from log msg and return findFailures on them
"""
if not isinstance(line, unicode):
try:
# Decode line to UTF-8
line = line.decode('utf-8')
except UnicodeDecodeError:
pass
timeMatch = self.dateDetector.matchTime(line)
if timeMatch:
# Lets split into time part and log part of the line
@ -485,7 +479,7 @@ class FileFilter(Filter):
while True:
line = container.readline()
if (line == "") or not self._isActive():
if not line or not self._isActive():
# The jail reached the bottom or has been stopped
break
self.processLineAndAdd(line)
@ -521,19 +515,13 @@ class FileContainer:
self.__tail = tail
self.__handler = None
# Try to open the file. Raises an exception if an error occured.
if sys.version_info >= (3,):
handler = open(filename, encoding='utf-8', errors='ignore')
else:
handler = open(filename)
handler = open(filename, 'rb')
stats = os.fstat(handler.fileno())
self.__ino = stats.st_ino
try:
firstLine = handler.readline()
# Computes the MD5 of the first line.
if isinstance(firstLine, unicode):
self.__hash = md5sum(firstLine.encode('utf-8')).digest()
else:
self.__hash = md5sum(firstLine).digest()
self.__hash = md5sum(firstLine).digest()
# Start at the beginning of file if tail mode is off.
if tail:
handler.seek(0, 2)
@ -547,20 +535,13 @@ class FileContainer:
return self.__filename
def open(self):
if sys.version_info >= (3,):
self.__handler = open(
self.__filename, encoding='utf-8', errors='ignore')
else:
self.__handler = open(self.__filename)
self.__handler = open(self.__filename, 'rb')
# Set the file descriptor to be FD_CLOEXEC
fd = self.__handler.fileno()
fcntl.fcntl(fd, fcntl.F_SETFD, fd | fcntl.FD_CLOEXEC)
firstLine = self.__handler.readline()
# Computes the MD5 of the first line.
if isinstance(firstLine, unicode):
myHash = md5sum(firstLine.encode('utf-8')).digest()
else:
myHash = md5sum(firstLine).digest()
myHash = md5sum(firstLine).digest()
stats = os.fstat(self.__handler.fileno())
# Compare hash and inode
if self.__hash != myHash or self.__ino != stats.st_ino:
@ -574,7 +555,14 @@ class FileContainer:
def readline(self):
if self.__handler == None:
return ""
return self.__handler.readline()
line = self.__handler.readline()
try:
line = line.decode('utf-8', 'strict')
except UnicodeDecodeError:
logSys.warn("Error decoding line to utf-8: %s" % `line`)
if sys.version_info >= (3,): # In python3, must be unicode
line = line.decode('utf-8', 'ignore')
return line
def close(self):
if not self.__handler == None:

@ -508,7 +508,7 @@ class GetFailures(unittest.TestCase):
# so that they could be reused by other tests
FAILURES_01 = ('193.168.0.128', 3, 1124013599.0,
['Aug 14 11:59:59 [sshd] error: PAM: Authentication failure for kevin from 193.168.0.128\n']*3)
[u'Aug 14 11:59:59 [sshd] error: PAM: Authentication failure for kevin from 193.168.0.128\n']*3)
def setUp(self):
"""Call before every test case."""
@ -532,7 +532,7 @@ class GetFailures(unittest.TestCase):
def testGetFailures02(self):
output = ('141.3.81.106', 4, 1124013539.0,
['Aug 14 11:%d:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:141.3.81.106 port 51332 ssh2\n'
[u'Aug 14 11:%d:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:141.3.81.106 port 51332 ssh2\n'
% m for m in 53, 54, 57, 58])
self.filter.addLogPath(GetFailures.FILENAME_02)
@ -565,11 +565,11 @@ class GetFailures(unittest.TestCase):
def testGetFailuresUseDNS(self):
# We should still catch failures with usedns = no ;-)
output_yes = ('192.0.43.10', 2, 1124013539.0,
['Aug 14 11:54:59 i60p295 sshd[12365]: Failed publickey for roehl from example.com port 51332 ssh2\n',
'Aug 14 11:58:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:192.0.43.10 port 51332 ssh2\n'])
[u'Aug 14 11:54:59 i60p295 sshd[12365]: Failed publickey for roehl from example.com port 51332 ssh2\n',
u'Aug 14 11:58:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:192.0.43.10 port 51332 ssh2\n'])
output_no = ('192.0.43.10', 1, 1124013539.0,
['Aug 14 11:58:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:192.0.43.10 port 51332 ssh2\n'])
[u'Aug 14 11:58:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:192.0.43.10 port 51332 ssh2\n'])
# Actually no exception would be raised -- it will be just set to 'no'
#self.assertRaises(ValueError,

Loading…
Cancel
Save