From d23d365be2b2027ab745e2222d3ae48550256339 Mon Sep 17 00:00:00 2001 From: Steven Hiscocks Date: Mon, 25 Feb 2013 22:45:16 +0000 Subject: [PATCH] Move handling of unicode decoding to FileContainer readline Also print warning for unicode decode failure, leaving as str in python2 and ignoring erroneous characters in python3 --- server/filter.py | 38 +++++++++++++------------------------ testcases/filtertestcase.py | 10 +++++----- 2 files changed, 18 insertions(+), 30 deletions(-) diff --git a/server/filter.py b/server/filter.py index 6a2cf342..3f507cd1 100644 --- a/server/filter.py +++ b/server/filter.py @@ -289,12 +289,6 @@ class Filter(JailThread): def processLine(self, line): """Split the time portion from log msg and return findFailures on them """ - if not isinstance(line, unicode): - try: - # Decode line to UTF-8 - line = line.decode('utf-8') - except UnicodeDecodeError: - pass timeMatch = self.dateDetector.matchTime(line) if timeMatch: # Lets split into time part and log part of the line @@ -485,7 +479,7 @@ class FileFilter(Filter): while True: line = container.readline() - if (line == "") or not self._isActive(): + if not line or not self._isActive(): # The jail reached the bottom or has been stopped break self.processLineAndAdd(line) @@ -521,19 +515,13 @@ class FileContainer: self.__tail = tail self.__handler = None # Try to open the file. Raises an exception if an error occured. - if sys.version_info >= (3,): - handler = open(filename, encoding='utf-8', errors='ignore') - else: - handler = open(filename) + handler = open(filename, 'rb') stats = os.fstat(handler.fileno()) self.__ino = stats.st_ino try: firstLine = handler.readline() # Computes the MD5 of the first line. - if isinstance(firstLine, unicode): - self.__hash = md5sum(firstLine.encode('utf-8')).digest() - else: - self.__hash = md5sum(firstLine).digest() + self.__hash = md5sum(firstLine).digest() # Start at the beginning of file if tail mode is off. if tail: handler.seek(0, 2) @@ -547,20 +535,13 @@ class FileContainer: return self.__filename def open(self): - if sys.version_info >= (3,): - self.__handler = open( - self.__filename, encoding='utf-8', errors='ignore') - else: - self.__handler = open(self.__filename) + self.__handler = open(self.__filename, 'rb') # Set the file descriptor to be FD_CLOEXEC fd = self.__handler.fileno() fcntl.fcntl(fd, fcntl.F_SETFD, fd | fcntl.FD_CLOEXEC) firstLine = self.__handler.readline() # Computes the MD5 of the first line. - if isinstance(firstLine, unicode): - myHash = md5sum(firstLine.encode('utf-8')).digest() - else: - myHash = md5sum(firstLine).digest() + myHash = md5sum(firstLine).digest() stats = os.fstat(self.__handler.fileno()) # Compare hash and inode if self.__hash != myHash or self.__ino != stats.st_ino: @@ -574,7 +555,14 @@ class FileContainer: def readline(self): if self.__handler == None: return "" - return self.__handler.readline() + line = self.__handler.readline() + try: + line = line.decode('utf-8', 'strict') + except UnicodeDecodeError: + logSys.warn("Error decoding line to utf-8: %s" % `line`) + if sys.version_info >= (3,): # In python3, must be unicode + line = line.decode('utf-8', 'ignore') + return line def close(self): if not self.__handler == None: diff --git a/testcases/filtertestcase.py b/testcases/filtertestcase.py index d9e859a3..58384449 100644 --- a/testcases/filtertestcase.py +++ b/testcases/filtertestcase.py @@ -508,7 +508,7 @@ class GetFailures(unittest.TestCase): # so that they could be reused by other tests FAILURES_01 = ('193.168.0.128', 3, 1124013599.0, - ['Aug 14 11:59:59 [sshd] error: PAM: Authentication failure for kevin from 193.168.0.128\n']*3) + [u'Aug 14 11:59:59 [sshd] error: PAM: Authentication failure for kevin from 193.168.0.128\n']*3) def setUp(self): """Call before every test case.""" @@ -532,7 +532,7 @@ class GetFailures(unittest.TestCase): def testGetFailures02(self): output = ('141.3.81.106', 4, 1124013539.0, - ['Aug 14 11:%d:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:141.3.81.106 port 51332 ssh2\n' + [u'Aug 14 11:%d:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:141.3.81.106 port 51332 ssh2\n' % m for m in 53, 54, 57, 58]) self.filter.addLogPath(GetFailures.FILENAME_02) @@ -565,11 +565,11 @@ class GetFailures(unittest.TestCase): def testGetFailuresUseDNS(self): # We should still catch failures with usedns = no ;-) output_yes = ('192.0.43.10', 2, 1124013539.0, - ['Aug 14 11:54:59 i60p295 sshd[12365]: Failed publickey for roehl from example.com port 51332 ssh2\n', - 'Aug 14 11:58:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:192.0.43.10 port 51332 ssh2\n']) + [u'Aug 14 11:54:59 i60p295 sshd[12365]: Failed publickey for roehl from example.com port 51332 ssh2\n', + u'Aug 14 11:58:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:192.0.43.10 port 51332 ssh2\n']) output_no = ('192.0.43.10', 1, 1124013539.0, - ['Aug 14 11:58:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:192.0.43.10 port 51332 ssh2\n']) + [u'Aug 14 11:58:59 i60p295 sshd[12365]: Failed publickey for roehl from ::ffff:192.0.43.10 port 51332 ssh2\n']) # Actually no exception would be raised -- it will be just set to 'no' #self.assertRaises(ValueError,