Merge pull request #1187 from yarikoptic/bf-nasty-children-tests

BF+RF: executeCmd should only return bool status on success and not throw exceptions (when timedout/killed) (Closes #1155)
pull/1188/head
Yaroslav Halchenko 2015-09-15 08:56:59 -04:00
commit d60c52b84f
3 changed files with 37 additions and 34 deletions

View File

@ -14,6 +14,9 @@ ver. 0.9.4 (2015/XX/XXX) - wanna-be-released
* Fix dnsToIp resolver for fqdn with large list of IPs (gh-1164) * Fix dnsToIp resolver for fqdn with large list of IPs (gh-1164)
* filter.d/apache-badbots.conf * filter.d/apache-badbots.conf
- Updated useragent string regex adding escape for `+` - Updated useragent string regex adding escape for `+`
* Treat failed and killed execution of commands identically (only
different log messages), which addresses different behavior on different
exit codes of dash and bash (gh-1155)
- New Features: - New Features:

View File

@ -560,32 +560,33 @@ class CommandAction(ActionBase):
return True return True
_cmd_lock.acquire() _cmd_lock.acquire()
try: # Try wrapped within another try needed for python version < 2.5 try:
retcode = None # to guarantee being defined upon early except
stdout = tempfile.TemporaryFile(suffix=".stdout", prefix="fai2ban_") stdout = tempfile.TemporaryFile(suffix=".stdout", prefix="fai2ban_")
stderr = tempfile.TemporaryFile(suffix=".stderr", prefix="fai2ban_") stderr = tempfile.TemporaryFile(suffix=".stderr", prefix="fai2ban_")
try:
popen = subprocess.Popen( popen = subprocess.Popen(
realCmd, stdout=stdout, stderr=stderr, shell=True, realCmd, stdout=stdout, stderr=stderr, shell=True,
preexec_fn=os.setsid # so that killpg does not kill our process preexec_fn=os.setsid # so that killpg does not kill our process
) )
stime = time.time() stime = time.time()
retcode = popen.poll()
while time.time() - stime <= timeout and retcode is None:
time.sleep(0.1)
retcode = popen.poll() retcode = popen.poll()
while time.time() - stime <= timeout and retcode is None: if retcode is None:
logSys.error("%s -- timed out after %i seconds." %
(realCmd, timeout))
pgid = os.getpgid(popen.pid)
os.killpg(pgid, signal.SIGTERM) # Terminate the process
time.sleep(0.1)
retcode = popen.poll()
if retcode is None: # Still going...
os.killpg(pgid, signal.SIGKILL) # Kill the process
time.sleep(0.1) time.sleep(0.1)
retcode = popen.poll() retcode = popen.poll()
if retcode is None: except OSError as e:
logSys.error("%s -- timed out after %i seconds." % logSys.error("%s -- failed with %s" % (realCmd, e))
(realCmd, timeout))
pgid = os.getpgid(popen.pid)
os.killpg(pgid, signal.SIGTERM) # Terminate the process
time.sleep(0.1)
retcode = popen.poll()
if retcode is None: # Still going...
os.killpg(pgid, signal.SIGKILL) # Kill the process
time.sleep(0.1)
retcode = popen.poll()
except OSError, e:
logSys.error("%s -- failed with %s" % (realCmd, e))
finally: finally:
_cmd_lock.release() _cmd_lock.release()
@ -603,15 +604,16 @@ class CommandAction(ActionBase):
return True return True
elif retcode is None: elif retcode is None:
logSys.error("%s -- unable to kill PID %i" % (realCmd, popen.pid)) logSys.error("%s -- unable to kill PID %i" % (realCmd, popen.pid))
elif retcode < 0: elif retcode < 0 or retcode > 128:
logSys.error("%s -- killed with %s" % # dash would return negative while bash 128 + n
(realCmd, signame.get(-retcode, "signal %i" % -retcode))) sigcode = -retcode if retcode < 0 else retcode - 128
logSys.error("%s -- killed with %s (return code: %s)" %
(realCmd, signame.get(sigcode, "signal %i" % sigcode), retcode))
else: else:
msg = _RETCODE_HINTS.get(retcode, None) msg = _RETCODE_HINTS.get(retcode, None)
logSys.error("%s -- returned %i" % (realCmd, retcode)) logSys.error("%s -- returned %i" % (realCmd, retcode))
if msg: if msg:
logSys.info("HINT on %i: %s" logSys.info("HINT on %i: %s"
% (retcode, msg % locals())) % (retcode, msg % locals()))
return False return False
raise RuntimeError("Command execution failed: %s" % realCmd)

View File

@ -196,11 +196,10 @@ class CommandActionTest(LogCaptureTestCase):
def testExecuteTimeout(self): def testExecuteTimeout(self):
stime = time.time() stime = time.time()
# Should take a minute # Should take a minute
self.assertRaises( self.assertFalse(CommandAction.executeCmd('sleep 60', timeout=2))
RuntimeError, CommandAction.executeCmd, 'sleep 60', timeout=2)
# give a test still 1 second, because system could be too busy # give a test still 1 second, because system could be too busy
self.assertTrue(time.time() >= stime + 2 and time.time() <= stime + 3) self.assertTrue(time.time() >= stime + 2 and time.time() <= stime + 3)
self.assertTrue(self._is_logged('sleep 60 -- timed out after 2 seconds') self.assertTrue(self._is_logged('sleep 60 -- timed out after 2 seconds')
or self._is_logged('sleep 60 -- timed out after 3 seconds')) or self._is_logged('sleep 60 -- timed out after 3 seconds'))
self.assertTrue(self._is_logged('sleep 60 -- killed with SIGTERM')) self.assertTrue(self._is_logged('sleep 60 -- killed with SIGTERM'))
@ -222,17 +221,16 @@ class CommandActionTest(LogCaptureTestCase):
return int(f.read()) return int(f.read())
# First test if can kill the bastard # First test if can kill the bastard
self.assertRaises( self.assertFalse(CommandAction.executeCmd(
RuntimeError, CommandAction.executeCmd, 'bash %s' % tmpFilename, timeout=.1) 'bash %s' % tmpFilename, timeout=.1))
# Verify that the process itself got killed # Verify that the process itself got killed
self.assertFalse(pid_exists(getnastypid())) # process should have been killed self.assertFalse(pid_exists(getnastypid())) # process should have been killed
self.assertTrue(self._is_logged('timed out')) self.assertTrue(self._is_logged('timed out'))
self.assertTrue(self._is_logged('killed with SIGTERM')) self.assertTrue(self._is_logged('killed with SIGTERM'))
# A bit evolved case even though, previous test already tests killing children processes # A bit evolved case even though, previous test already tests killing children processes
self.assertRaises( self.assertFalse(CommandAction.executeCmd(
RuntimeError, CommandAction.executeCmd, 'out=`bash %s`; echo ALRIGHT' % tmpFilename, 'out=`bash %s`; echo ALRIGHT' % tmpFilename, timeout=.2))
timeout=.2)
# Verify that the process itself got killed # Verify that the process itself got killed
self.assertFalse(pid_exists(getnastypid())) self.assertFalse(pid_exists(getnastypid()))
self.assertTrue(self._is_logged('timed out')) self.assertTrue(self._is_logged('timed out'))