Add trusty node health monitoring

Upstart monitors the process of docker, kubelet, and kube-proxy.
This change adds an upstart job running as daemon to conduct
non-PID health monitoring.
pull/6/head
Andy Zheng 2015-09-14 14:14:56 -07:00
parent f03a267089
commit 7427387938
1 changed files with 40 additions and 0 deletions

View File

@ -372,5 +372,45 @@ script
fi
end script
--===============6024533374511606659==
MIME-Version: 1.0
Content-Type: text/upstart-job; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="kube-node-health-monitoring.conf"
description "Kubenetes node health monitoring"
start on stopped kube-docker and started kube-proxy
respawn
script
set -o nounset
# Wait for a minute to let docker, kubelet, and kube-proxy processes finish initialization.
# TODO(andyzheng0831): replace it with a more reliable method if possible.
sleep 60
sleep_seconds=10
max_seconds=10
# We simply kill the process when there is a failure. Another upstart job will automatically
# restart the process.
while [ 1 ]; do
if ! timeout 10 docker version > /dev/null; then
echo "Docker daemon failed!"
pkill docker
fi
if ! curl -m ${max_seconds} -f -s http://127.0.0.1:10255/healthz > /dev/null; then
echo "Kubelet is unhealthy!"
pkill kubelet
fi
if ! curl -m ${max_seconds} -f -s http://127.0.0.1:10249/healthz > /dev/null; then
echo "Kube-proxy is unhealthy!"
pkill kube-proxy
fi
sleep ${sleep_seconds}
done
end script
--===============6024533374511606659==--