From 426e4820bcd2f7a85c348b6715ee47d6f0d1f2ee Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 1 Mar 2018 09:52:40 +0100 Subject: [PATCH 1/3] local-up-cluster: avoid "No such process" messages when cleaning up Killing processes in cleanup is racing with those processes terminating by themselves when aborting local-up-cluster.sh with CTRL-C. That leads to unnecessary error messages: ^CCleaning up... kill: (21592): No such process kill: (21586): No such process --- hack/local-up-cluster.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hack/local-up-cluster.sh b/hack/local-up-cluster.sh index f0243194ae..c067ccd4fa 100755 --- a/hack/local-up-cluster.sh +++ b/hack/local-up-cluster.sh @@ -356,27 +356,27 @@ cleanup() # Check if the API server is still running [[ -n "${APISERVER_PID-}" ]] && APISERVER_PIDS=$(pgrep -P ${APISERVER_PID} ; ps -o pid= -p ${APISERVER_PID}) - [[ -n "${APISERVER_PIDS-}" ]] && sudo kill ${APISERVER_PIDS} + [[ -n "${APISERVER_PIDS-}" ]] && sudo kill ${APISERVER_PIDS} 2>/dev/null # Check if the controller-manager is still running [[ -n "${CTLRMGR_PID-}" ]] && CTLRMGR_PIDS=$(pgrep -P ${CTLRMGR_PID} ; ps -o pid= -p ${CTLRMGR_PID}) - [[ -n "${CTLRMGR_PIDS-}" ]] && sudo kill ${CTLRMGR_PIDS} + [[ -n "${CTLRMGR_PIDS-}" ]] && sudo kill ${CTLRMGR_PIDS} 2>/dev/null if [[ -n "$DOCKERIZE_KUBELET" ]]; then cleanup_dockerized_kubelet else # Check if the kubelet is still running [[ -n "${KUBELET_PID-}" ]] && KUBELET_PIDS=$(pgrep -P ${KUBELET_PID} ; ps -o pid= -p ${KUBELET_PID}) - [[ -n "${KUBELET_PIDS-}" ]] && sudo kill ${KUBELET_PIDS} + [[ -n "${KUBELET_PIDS-}" ]] && sudo kill ${KUBELET_PIDS} 2>/dev/null fi # Check if the proxy is still running [[ -n "${PROXY_PID-}" ]] && PROXY_PIDS=$(pgrep -P ${PROXY_PID} ; ps -o pid= -p ${PROXY_PID}) - [[ -n "${PROXY_PIDS-}" ]] && sudo kill ${PROXY_PIDS} + [[ -n "${PROXY_PIDS-}" ]] && sudo kill ${PROXY_PIDS} 2>/dev/null # Check if the scheduler is still running [[ -n "${SCHEDULER_PID-}" ]] && SCHEDULER_PIDS=$(pgrep -P ${SCHEDULER_PID} ; ps -o pid= -p ${SCHEDULER_PID}) - [[ -n "${SCHEDULER_PIDS-}" ]] && sudo kill ${SCHEDULER_PIDS} + [[ -n "${SCHEDULER_PIDS-}" ]] && sudo kill ${SCHEDULER_PIDS} 2>/dev/null # Check if the etcd is still running [[ -n "${ETCD_PID-}" ]] && kube::etcd::stop From 9cf35f355b0bd955e50e137a65277efd2d2bce2d Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 1 Mar 2018 09:56:20 +0100 Subject: [PATCH 2/3] local-up-cluster: warn about failing processes This helps developers detect when something went wrong and then also points them in the right direction by showing where the corresponding log file is, if there is one. For example, since commit 43cb024402e6 in April 2017 kube-proxy failed to start because featureGates was not set correctly in the .yaml file, but no-one seems to have noticed this. --- hack/local-up-cluster.sh | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/hack/local-up-cluster.sh b/hack/local-up-cluster.sh index c067ccd4fa..35c4398072 100755 --- a/hack/local-up-cluster.sh +++ b/hack/local-up-cluster.sh @@ -386,6 +386,43 @@ cleanup() exit 0 } +# Check if all processes are still running. Prints a warning once each time +# a process dies unexpectedly. +function healthcheck { + if [[ -n "${APISERVER_PID-}" ]] && ! sudo kill -0 ${APISERVER_PID} 2>/dev/null; then + warning "API server terminated unexpectedly, see ${APISERVER_LOG}" + APISERVER_PID= + fi + + if [[ -n "${CTLRMGR_PID-}" ]] && ! sudo kill -0 ${CTLRMGR_PID} 2>/dev/null; then + warning "kube-controller-manager terminated unexpectedly, see ${CTLRMGR_LOG}" + CTLRMGR_PID= + fi + + if [[ -n "$DOCKERIZE_KUBELET" ]]; then + # TODO (https://github.com/kubernetes/kubernetes/issues/62474): check health also in this case + : + elif [[ -n "${KUBELET_PID-}" ]] && ! sudo kill -0 ${KUBELET_PID} 2>/dev/null; then + warning "kubelet terminated unexpectedly, see ${KUBELET_LOG}" + KUBELET_PID= + fi + + if [[ -n "${PROXY_PID-}" ]] && ! sudo kill -0 ${PROXY_PID} 2>/dev/null; then + warning "kube-proxy terminated unexpectedly, see ${PROXY_LOG}" + PROXY_PID= + fi + + if [[ -n "${SCHEDULER_PID-}" ]] && ! sudo kill -0 ${SCHEDULER_PID} 2>/dev/null; then + warning "scheduler terminated unexpectedly, see ${SCHEDULER_LOG}" + SCHEDULER_PID= + fi + + if [[ -n "${ETCD_PID-}" ]] && ! sudo kill -0 ${ETCD_PID} 2>/dev/null; then + warning "etcd terminated unexpectedly" + ETCD_PID= + fi +} + function warning { message=$1 @@ -1008,7 +1045,7 @@ fi print_success if [[ "${ENABLE_DAEMON}" = false ]]; then - while true; do sleep 1; done + while true; do sleep 1; healthcheck; done fi if [[ "${KUBETEST_IN_DOCKER:-}" == "true" ]]; then From 731c92896ba389f85bf3b4b91ff60e288e2b4239 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 1 Mar 2018 10:01:50 +0100 Subject: [PATCH 3/3] local-up-cluster: fix kube-proxy featureGates configuration Commit 43cb024402e6 replaced command line parameters with a .yaml configuration file. But feature gates must be configured with a map in .yaml, not with a comma-separated string as in the command line parameters. As a result, kube-proxy failed to start and networking was broken in the cluster. Commit c339fc0c4fad tried to fix that by moving feature gates back to the command line, but later it was found out that the command line parameter gets ignored when also specifying a config. Therefore now the feature gates variable gets converted into a proper map in the config. --- hack/local-up-cluster.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hack/local-up-cluster.sh b/hack/local-up-cluster.sh index 35c4398072..d46e73c8cc 100755 --- a/hack/local-up-cluster.sh +++ b/hack/local-up-cluster.sh @@ -837,10 +837,18 @@ clientConnection: hostnameOverride: ${HOSTNAME_OVERRIDE} mode: ${KUBE_PROXY_MODE} EOF + if [[ -n ${FEATURE_GATES} ]]; then + echo "featureGates:" + # Convert from foo=true,bar=false to + # foo: true + # bar: false + for gate in $(echo ${FEATURE_GATES} | tr ',' ' '); do + echo $gate | sed -e 's/\(.*\)=\(.*\)/ \1: \2/' + done + fi >>/tmp/kube-proxy.yaml sudo "${GO_OUT}/hyperkube" proxy \ --v=${LOG_LEVEL} \ - --feature-gates="${FEATURE_GATES}" \ --config=/tmp/kube-proxy.yaml \ --master="https://${API_HOST}:${API_SECURE_PORT}" >"${PROXY_LOG}" 2>&1 & PROXY_PID=$!