mirror of https://github.com/k3s-io/k3s
Merge pull request #60633 from pohly/local-up-cluster-enhancements
Automatic merge from submit-queue (batch tested with PRs 62445, 62768, 60633). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Local up cluster enhancements **What this PR does / why we need it**: Networking in a cluster brought up by hack/local-up-cluster.sh was broken because kube-proxy fails to start: connectivity to the Internet is okay, but anything involving communication among services (for example, starting cluster DNS) fails. Debugging this took a while because there was no indication in the output of hack/local-up-cluster.sh that something had gone wrong. There were also some harmless "no such process" errors during cleanup. The kube-proxy startup issue was fixed in parallel by PR #60431 but that fix turned out to be incomplete, so this PR uses a different approach, suppresses the harmless error messages (caused by a race) and also makes it more obvious when processes die by checking them regularly. **Special notes for your reviewer**: I used KUBE_ENABLE_CLUSTER_DNS=true NET_PLUGIN=kubenet hack/local-up-cluster.sh -O to start the cluster. Check the kube-dns pod and its logs to see how kube-dns fails to communicate with the API server. I've left out health checking of kubelet when run under Docker because that isn't something that I have used myself yet and I wanted to send the enhancements without further delays. I can try to add that later once you agree that such a health check is useful. **Release note**: ```release-note fix network setup in hack/local-up-cluster.sh (https://github.com/kubernetes/kubernetes/pull/60431) better error diagnostics in hack/local-up-cluster.sh output ```pull/8/head
commit
1ee2ac07c1
|
@ -356,27 +356,27 @@ cleanup()
|
||||||
|
|
||||||
# Check if the API server is still running
|
# Check if the API server is still running
|
||||||
[[ -n "${APISERVER_PID-}" ]] && APISERVER_PIDS=$(pgrep -P ${APISERVER_PID} ; ps -o pid= -p ${APISERVER_PID})
|
[[ -n "${APISERVER_PID-}" ]] && APISERVER_PIDS=$(pgrep -P ${APISERVER_PID} ; ps -o pid= -p ${APISERVER_PID})
|
||||||
[[ -n "${APISERVER_PIDS-}" ]] && sudo kill ${APISERVER_PIDS}
|
[[ -n "${APISERVER_PIDS-}" ]] && sudo kill ${APISERVER_PIDS} 2>/dev/null
|
||||||
|
|
||||||
# Check if the controller-manager is still running
|
# Check if the controller-manager is still running
|
||||||
[[ -n "${CTLRMGR_PID-}" ]] && CTLRMGR_PIDS=$(pgrep -P ${CTLRMGR_PID} ; ps -o pid= -p ${CTLRMGR_PID})
|
[[ -n "${CTLRMGR_PID-}" ]] && CTLRMGR_PIDS=$(pgrep -P ${CTLRMGR_PID} ; ps -o pid= -p ${CTLRMGR_PID})
|
||||||
[[ -n "${CTLRMGR_PIDS-}" ]] && sudo kill ${CTLRMGR_PIDS}
|
[[ -n "${CTLRMGR_PIDS-}" ]] && sudo kill ${CTLRMGR_PIDS} 2>/dev/null
|
||||||
|
|
||||||
if [[ -n "$DOCKERIZE_KUBELET" ]]; then
|
if [[ -n "$DOCKERIZE_KUBELET" ]]; then
|
||||||
cleanup_dockerized_kubelet
|
cleanup_dockerized_kubelet
|
||||||
else
|
else
|
||||||
# Check if the kubelet is still running
|
# Check if the kubelet is still running
|
||||||
[[ -n "${KUBELET_PID-}" ]] && KUBELET_PIDS=$(pgrep -P ${KUBELET_PID} ; ps -o pid= -p ${KUBELET_PID})
|
[[ -n "${KUBELET_PID-}" ]] && KUBELET_PIDS=$(pgrep -P ${KUBELET_PID} ; ps -o pid= -p ${KUBELET_PID})
|
||||||
[[ -n "${KUBELET_PIDS-}" ]] && sudo kill ${KUBELET_PIDS}
|
[[ -n "${KUBELET_PIDS-}" ]] && sudo kill ${KUBELET_PIDS} 2>/dev/null
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check if the proxy is still running
|
# Check if the proxy is still running
|
||||||
[[ -n "${PROXY_PID-}" ]] && PROXY_PIDS=$(pgrep -P ${PROXY_PID} ; ps -o pid= -p ${PROXY_PID})
|
[[ -n "${PROXY_PID-}" ]] && PROXY_PIDS=$(pgrep -P ${PROXY_PID} ; ps -o pid= -p ${PROXY_PID})
|
||||||
[[ -n "${PROXY_PIDS-}" ]] && sudo kill ${PROXY_PIDS}
|
[[ -n "${PROXY_PIDS-}" ]] && sudo kill ${PROXY_PIDS} 2>/dev/null
|
||||||
|
|
||||||
# Check if the scheduler is still running
|
# Check if the scheduler is still running
|
||||||
[[ -n "${SCHEDULER_PID-}" ]] && SCHEDULER_PIDS=$(pgrep -P ${SCHEDULER_PID} ; ps -o pid= -p ${SCHEDULER_PID})
|
[[ -n "${SCHEDULER_PID-}" ]] && SCHEDULER_PIDS=$(pgrep -P ${SCHEDULER_PID} ; ps -o pid= -p ${SCHEDULER_PID})
|
||||||
[[ -n "${SCHEDULER_PIDS-}" ]] && sudo kill ${SCHEDULER_PIDS}
|
[[ -n "${SCHEDULER_PIDS-}" ]] && sudo kill ${SCHEDULER_PIDS} 2>/dev/null
|
||||||
|
|
||||||
# Check if the etcd is still running
|
# Check if the etcd is still running
|
||||||
[[ -n "${ETCD_PID-}" ]] && kube::etcd::stop
|
[[ -n "${ETCD_PID-}" ]] && kube::etcd::stop
|
||||||
|
@ -386,6 +386,43 @@ cleanup()
|
||||||
exit 0
|
exit 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Check if all processes are still running. Prints a warning once each time
|
||||||
|
# a process dies unexpectedly.
|
||||||
|
function healthcheck {
|
||||||
|
if [[ -n "${APISERVER_PID-}" ]] && ! sudo kill -0 ${APISERVER_PID} 2>/dev/null; then
|
||||||
|
warning "API server terminated unexpectedly, see ${APISERVER_LOG}"
|
||||||
|
APISERVER_PID=
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n "${CTLRMGR_PID-}" ]] && ! sudo kill -0 ${CTLRMGR_PID} 2>/dev/null; then
|
||||||
|
warning "kube-controller-manager terminated unexpectedly, see ${CTLRMGR_LOG}"
|
||||||
|
CTLRMGR_PID=
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n "$DOCKERIZE_KUBELET" ]]; then
|
||||||
|
# TODO (https://github.com/kubernetes/kubernetes/issues/62474): check health also in this case
|
||||||
|
:
|
||||||
|
elif [[ -n "${KUBELET_PID-}" ]] && ! sudo kill -0 ${KUBELET_PID} 2>/dev/null; then
|
||||||
|
warning "kubelet terminated unexpectedly, see ${KUBELET_LOG}"
|
||||||
|
KUBELET_PID=
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n "${PROXY_PID-}" ]] && ! sudo kill -0 ${PROXY_PID} 2>/dev/null; then
|
||||||
|
warning "kube-proxy terminated unexpectedly, see ${PROXY_LOG}"
|
||||||
|
PROXY_PID=
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n "${SCHEDULER_PID-}" ]] && ! sudo kill -0 ${SCHEDULER_PID} 2>/dev/null; then
|
||||||
|
warning "scheduler terminated unexpectedly, see ${SCHEDULER_LOG}"
|
||||||
|
SCHEDULER_PID=
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -n "${ETCD_PID-}" ]] && ! sudo kill -0 ${ETCD_PID} 2>/dev/null; then
|
||||||
|
warning "etcd terminated unexpectedly"
|
||||||
|
ETCD_PID=
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
function warning {
|
function warning {
|
||||||
message=$1
|
message=$1
|
||||||
|
|
||||||
|
@ -817,10 +854,18 @@ clientConnection:
|
||||||
hostnameOverride: ${HOSTNAME_OVERRIDE}
|
hostnameOverride: ${HOSTNAME_OVERRIDE}
|
||||||
mode: ${KUBE_PROXY_MODE}
|
mode: ${KUBE_PROXY_MODE}
|
||||||
EOF
|
EOF
|
||||||
|
if [[ -n ${FEATURE_GATES} ]]; then
|
||||||
|
echo "featureGates:"
|
||||||
|
# Convert from foo=true,bar=false to
|
||||||
|
# foo: true
|
||||||
|
# bar: false
|
||||||
|
for gate in $(echo ${FEATURE_GATES} | tr ',' ' '); do
|
||||||
|
echo $gate | sed -e 's/\(.*\)=\(.*\)/ \1: \2/'
|
||||||
|
done
|
||||||
|
fi >>/tmp/kube-proxy.yaml
|
||||||
|
|
||||||
sudo "${GO_OUT}/hyperkube" proxy \
|
sudo "${GO_OUT}/hyperkube" proxy \
|
||||||
--v=${LOG_LEVEL} \
|
--v=${LOG_LEVEL} \
|
||||||
--feature-gates="${FEATURE_GATES}" \
|
|
||||||
--config=/tmp/kube-proxy.yaml \
|
--config=/tmp/kube-proxy.yaml \
|
||||||
--master="https://${API_HOST}:${API_SECURE_PORT}" >"${PROXY_LOG}" 2>&1 &
|
--master="https://${API_HOST}:${API_SECURE_PORT}" >"${PROXY_LOG}" 2>&1 &
|
||||||
PROXY_PID=$!
|
PROXY_PID=$!
|
||||||
|
@ -1025,7 +1070,7 @@ fi
|
||||||
print_success
|
print_success
|
||||||
|
|
||||||
if [[ "${ENABLE_DAEMON}" = false ]]; then
|
if [[ "${ENABLE_DAEMON}" = false ]]; then
|
||||||
while true; do sleep 1; done
|
while true; do sleep 1; healthcheck; done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ "${KUBETEST_IN_DOCKER:-}" == "true" ]]; then
|
if [[ "${KUBETEST_IN_DOCKER:-}" == "true" ]]; then
|
||||||
|
|
Loading…
Reference in New Issue