From 7112e4f6c6e8174827e25776303e078add561a4c Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Sun, 26 Jul 2015 20:41:43 -0400 Subject: [PATCH 1/2] BF: kill the entire process group upon timeout (Close #1129) Requires also establishing a new process group for a child process, which changes previous behavior --- fail2ban/server/action.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fail2ban/server/action.py b/fail2ban/server/action.py index 60cb00e4..c58fde2c 100644 --- a/fail2ban/server/action.py +++ b/fail2ban/server/action.py @@ -565,7 +565,9 @@ class CommandAction(ActionBase): stderr = tempfile.TemporaryFile(suffix=".stderr", prefix="fai2ban_") try: popen = subprocess.Popen( - realCmd, stdout=stdout, stderr=stderr, shell=True) + realCmd, stdout=stdout, stderr=stderr, shell=True, + preexec_fn=os.setsid # so that killpg does not kill our process + ) stime = time.time() retcode = popen.poll() while time.time() - stime <= timeout and retcode is None: @@ -574,11 +576,12 @@ class CommandAction(ActionBase): if retcode is None: logSys.error("%s -- timed out after %i seconds." % (realCmd, timeout)) - os.kill(popen.pid, signal.SIGTERM) # Terminate the process + pgid = os.getpgid(popen.pid) + os.killpg(pgid, signal.SIGTERM) # Terminate the process time.sleep(0.1) retcode = popen.poll() if retcode is None: # Still going... - os.kill(popen.pid, signal.SIGKILL) # Kill the process + os.killpg(pgid, signal.SIGKILL) # Kill the process time.sleep(0.1) retcode = popen.poll() except OSError, e: From 515ad6dc12f8d29ccfe13767e40eb1bb4c21ac43 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Sun, 26 Jul 2015 21:18:36 -0400 Subject: [PATCH 2/2] TST: test to verify killing stuck children processes --- fail2ban/tests/actiontestcase.py | 42 +++++++++++++++++++++++++++++++- fail2ban/tests/utils.py | 27 ++++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/fail2ban/tests/actiontestcase.py b/fail2ban/tests/actiontestcase.py index 5850309e..febbc619 100644 --- a/fail2ban/tests/actiontestcase.py +++ b/fail2ban/tests/actiontestcase.py @@ -24,12 +24,14 @@ __author__ = "Cyril Jaquier" __copyright__ = "Copyright (c) 2004 Cyril Jaquier" __license__ = "GPL" +import os import time +import tempfile from ..server.action import CommandAction, CallingMap from .utils import LogCaptureTestCase - +from .utils import pid_exists class CommandActionTest(LogCaptureTestCase): @@ -202,6 +204,44 @@ class CommandActionTest(LogCaptureTestCase): or self._is_logged('sleep 60 -- timed out after 3 seconds')) self.assertTrue(self._is_logged('sleep 60 -- killed with SIGTERM')) + def testExecuteTimeoutWithNastyChildren(self): + # temporary file for a nasty kid shell script + tmpFilename = tempfile.mktemp(".sh", "fail2ban_") + # Create a nasty script which would hang there for a while + with open(tmpFilename, 'w') as f: + f.write("""#!/bin/bash + trap : HUP EXIT TERM + + echo "$$" > %s.pid + echo "my pid $$ . sleeping lo-o-o-ong" + sleep 10000 + """ % tmpFilename) + + def getnastypid(): + with open(tmpFilename + '.pid') as f: + return int(f.read()) + + # First test if can kill the bastard + self.assertRaises( + RuntimeError, CommandAction.executeCmd, 'bash %s' % tmpFilename, timeout=.1) + # Verify that the proccess itself got killed + self.assertFalse(pid_exists(getnastypid())) # process should have been killed + self.assertTrue(self._is_logged('timed out')) + self.assertTrue(self._is_logged('killed with SIGTERM')) + + # A bit evolved case even though, previous test already tests killing children processes + self.assertRaises( + RuntimeError, CommandAction.executeCmd, 'out=`bash %s`; echo ALRIGHT' % tmpFilename, + timeout=.2) + # Verify that the proccess itself got killed + self.assertFalse(pid_exists(getnastypid())) + self.assertTrue(self._is_logged('timed out')) + self.assertTrue(self._is_logged('killed with SIGTERM')) + + os.unlink(tmpFilename) + os.unlink(tmpFilename + '.pid') + + def testCaptureStdOutErr(self): CommandAction.executeCmd('echo "How now brown cow"') self.assertTrue(self._is_logged("'How now brown cow\\n'")) diff --git a/fail2ban/tests/utils.py b/fail2ban/tests/utils.py index 89539107..e7993ead 100644 --- a/fail2ban/tests/utils.py +++ b/fail2ban/tests/utils.py @@ -237,3 +237,30 @@ class LogCaptureTestCase(unittest.TestCase): def printLog(self): print(self._log.getvalue()) + +# Solution from http://stackoverflow.com/questions/568271/how-to-check-if-there-exists-a-process-with-a-given-pid +# under cc by-sa 3.0 +if os.name == 'posix': + def pid_exists(pid): + """Check whether pid exists in the current process table.""" + import errno + if pid < 0: + return False + try: + os.kill(pid, 0) + except OSError as e: + return e.errno == errno.EPERM + else: + return True +else: + def pid_exists(pid): + import ctypes + kernel32 = ctypes.windll.kernel32 + SYNCHRONIZE = 0x100000 + + process = kernel32.OpenProcess(SYNCHRONIZE, 0, pid) + if process != 0: + kernel32.CloseHandle(process) + return True + else: + return False \ No newline at end of file