mirror of https://github.com/k3s-io/k3s
Merge pull request #23992 from mml/docker-checker
Automatic merge from submit-queue Rewrite docker-checker.sh to make it less kill-happy.pull/6/head
commit
af1e1c3ce6
|
@ -18,38 +18,70 @@
|
|||
# it detects a failure. It then exits, and supervisord restarts it
|
||||
# which in turn restarts docker.
|
||||
|
||||
/etc/init.d/docker stop
|
||||
# Make sure docker gracefully terminated before start again
|
||||
starttime=`date +%s`
|
||||
while pidof docker > /dev/null; do
|
||||
currenttime=`date +%s`
|
||||
((elapsedtime = currenttime - starttime))
|
||||
# after 60 seconds, forcefully terminate docker process
|
||||
if test $elapsedtime -gt 60; then
|
||||
echo "attempting to kill docker process with sigkill signal"
|
||||
kill -9 `pidof docker` || sleep 10
|
||||
else
|
||||
echo "waiting clean shutdown"
|
||||
sleep 10
|
||||
fi
|
||||
done
|
||||
|
||||
echo "docker is not running. starting docker"
|
||||
|
||||
# cleanup docker network checkpoint to avoid running into known issue
|
||||
# of docker (https://github.com/docker/docker/issues/18283)
|
||||
rm -rf /var/lib/docker/network
|
||||
|
||||
/etc/init.d/docker start
|
||||
|
||||
echo "waiting 30s for startup"
|
||||
sleep 30
|
||||
|
||||
while true; do
|
||||
if ! timeout 60 docker ps > /dev/null; then
|
||||
echo "Docker failed!"
|
||||
exit 2
|
||||
main() {
|
||||
if ! healthy 60; then
|
||||
stop_docker
|
||||
start_docker
|
||||
echo "waiting 30s for startup"
|
||||
sleep 30
|
||||
healthy 60
|
||||
fi
|
||||
sleep 10
|
||||
done
|
||||
|
||||
while healthy; do
|
||||
sleep 10
|
||||
done
|
||||
|
||||
echo "Docker failed!"
|
||||
exit 2
|
||||
}
|
||||
|
||||
# Performs health check on docker. If a parameter is passed, it is treated as
|
||||
# the number of seconds to keep trying for a healthy result. If none is passed
|
||||
# we make only one attempt.
|
||||
healthy() {
|
||||
max_retry_sec="$1"
|
||||
shift
|
||||
|
||||
starttime=$(date +%s)
|
||||
while ! timeout 60 docker ps > /dev/null; do
|
||||
if [[ -z "$max_retry_sec" || $(( $(date +%s) - starttime )) -gt "$max_retry_sec" ]]; then
|
||||
echo "docker ps did not succeed"
|
||||
return 2
|
||||
else
|
||||
echo "waiting 5s before retry"
|
||||
sleep 5
|
||||
fi
|
||||
done
|
||||
echo "docker is healthy"
|
||||
return 0
|
||||
}
|
||||
|
||||
stop_docker() {
|
||||
/etc/init.d/docker stop
|
||||
# Make sure docker gracefully terminated before start again
|
||||
starttime=`date +%s`
|
||||
while pidof docker > /dev/null; do
|
||||
currenttime=`date +%s`
|
||||
((elapsedtime = currenttime - starttime))
|
||||
# after 60 seconds, forcefully terminate docker process
|
||||
if test $elapsedtime -gt 60; then
|
||||
echo "attempting to kill docker process with sigkill signal"
|
||||
kill -9 `pidof docker` || sleep 10
|
||||
else
|
||||
echo "waiting clean shutdown"
|
||||
sleep 10
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
start_docker() {
|
||||
echo "docker is not running. starting docker"
|
||||
|
||||
# cleanup docker network checkpoint to avoid running into known issue
|
||||
# of docker (https://github.com/docker/docker/issues/18283)
|
||||
rm -rf /var/lib/docker/network
|
||||
|
||||
/etc/init.d/docker start
|
||||
}
|
||||
|
||||
main
|
||||
|
|
Loading…
Reference in New Issue