Merge pull request #60633 from pohly/local-up-cluster-enhancements

Automatic merge from submit-queue (batch tested with PRs 62445, 62768, 60633). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Local up cluster enhancements **What this PR does / why we need it**: Networking in a cluster brought up by hack/local-up-cluster.sh was broken because kube-proxy fails to start: connectivity to the Internet is okay, but anything involving communication among services (for example, starting cluster DNS) fails. Debugging this took a while because there was no indication in the output of hack/local-up-cluster.sh that something had gone wrong. There were also some harmless "no such process" errors during cleanup. The kube-proxy startup issue was fixed in parallel by PR #60431 but that fix turned out to be incomplete, so this PR uses a different approach, suppresses the harmless error messages (caused by a race) and also makes it more obvious when processes die by checking them regularly. **Special notes for your reviewer**: I used KUBE_ENABLE_CLUSTER_DNS=true NET_PLUGIN=kubenet hack/local-up-cluster.sh -O to start the cluster. Check the kube-dns pod and its logs to see how kube-dns fails to communicate with the API server. I've left out health checking of kubelet when run under Docker because that isn't something that I have used myself yet and I wanted to send the enhancements without further delays. I can try to add that later once you agree that such a health check is useful. **Release note**: ```release-note fix network setup in hack/local-up-cluster.sh (https://github.com/kubernetes/kubernetes/pull/60431) better error diagnostics in hack/local-up-cluster.sh output ```
2018-04-18 12:25:12 -07:00 · 2018-04-18 12:25:12 -07:00 · 1ee2ac07c1
parent dd8f8819e4 731c92896b
commit 1ee2ac07c1
1 changed files with 52 additions and 7 deletions
--- a/hack/local-up-cluster.sh
+++ b/hack/local-up-cluster.sh
@ -356,27 +356,27 @@ cleanup()
  # Check if the API server is still running
  [[ -n "${APISERVER_PID-}" ]] && APISERVER_PIDS=$(pgrep -P ${APISERVER_PID} ; ps -o pid= -p ${APISERVER_PID})
-  [[ -n "${APISERVER_PIDS-}" ]] && sudo kill ${APISERVER_PIDS}
+  [[ -n "${APISERVER_PIDS-}" ]] && sudo kill ${APISERVER_PIDS} 2>/dev/null
  # Check if the controller-manager is still running
  [[ -n "${CTLRMGR_PID-}" ]] && CTLRMGR_PIDS=$(pgrep -P ${CTLRMGR_PID} ; ps -o pid= -p ${CTLRMGR_PID})
-  [[ -n "${CTLRMGR_PIDS-}" ]] && sudo kill ${CTLRMGR_PIDS}
+  [[ -n "${CTLRMGR_PIDS-}" ]] && sudo kill ${CTLRMGR_PIDS} 2>/dev/null
  if [[ -n "$DOCKERIZE_KUBELET" ]]; then
    cleanup_dockerized_kubelet
  else
    # Check if the kubelet is still running
    [[ -n "${KUBELET_PID-}" ]] && KUBELET_PIDS=$(pgrep -P ${KUBELET_PID} ; ps -o pid= -p ${KUBELET_PID})
-    [[ -n "${KUBELET_PIDS-}" ]] && sudo kill ${KUBELET_PIDS}
+    [[ -n "${KUBELET_PIDS-}" ]] && sudo kill ${KUBELET_PIDS} 2>/dev/null
  fi
  # Check if the proxy is still running
  [[ -n "${PROXY_PID-}" ]] && PROXY_PIDS=$(pgrep -P ${PROXY_PID} ; ps -o pid= -p ${PROXY_PID})
-  [[ -n "${PROXY_PIDS-}" ]] && sudo kill ${PROXY_PIDS}
+  [[ -n "${PROXY_PIDS-}" ]] && sudo kill ${PROXY_PIDS} 2>/dev/null
  # Check if the scheduler is still running
  [[ -n "${SCHEDULER_PID-}" ]] && SCHEDULER_PIDS=$(pgrep -P ${SCHEDULER_PID} ; ps -o pid= -p ${SCHEDULER_PID})
-  [[ -n "${SCHEDULER_PIDS-}" ]] && sudo kill ${SCHEDULER_PIDS}
+  [[ -n "${SCHEDULER_PIDS-}" ]] && sudo kill ${SCHEDULER_PIDS} 2>/dev/null
  # Check if the etcd is still running
  [[ -n "${ETCD_PID-}" ]] && kube::etcd::stop
@ -386,6 +386,43 @@ cleanup()
  exit 0
 }
 # Check if all processes are still running. Prints a warning once each time
 # a process dies unexpectedly.
 function healthcheck {
  if [[ -n "${APISERVER_PID-}" ]] && ! sudo kill -0 ${APISERVER_PID} 2>/dev/null; then
    warning "API server terminated unexpectedly, see ${APISERVER_LOG}"
    APISERVER_PID=
  fi
  if [[ -n "${CTLRMGR_PID-}" ]] && ! sudo kill -0 ${CTLRMGR_PID} 2>/dev/null; then
    warning "kube-controller-manager terminated unexpectedly, see ${CTLRMGR_LOG}"
    CTLRMGR_PID=
  fi
  if [[ -n "$DOCKERIZE_KUBELET" ]]; then
    # TODO (https://github.com/kubernetes/kubernetes/issues/62474): check health also in this case
    :
  elif [[ -n "${KUBELET_PID-}" ]] && ! sudo kill -0 ${KUBELET_PID} 2>/dev/null; then
    warning "kubelet terminated unexpectedly, see ${KUBELET_LOG}"
    KUBELET_PID=
  fi
  if [[ -n "${PROXY_PID-}" ]] && ! sudo kill -0 ${PROXY_PID} 2>/dev/null; then
    warning "kube-proxy terminated unexpectedly, see ${PROXY_LOG}"
    PROXY_PID=
  fi
  if [[ -n "${SCHEDULER_PID-}" ]] && ! sudo kill -0 ${SCHEDULER_PID} 2>/dev/null; then
    warning "scheduler terminated unexpectedly, see ${SCHEDULER_LOG}"
    SCHEDULER_PID=
  fi
  if [[ -n "${ETCD_PID-}" ]] && ! sudo kill -0 ${ETCD_PID} 2>/dev/null; then
    warning "etcd terminated unexpectedly"
    ETCD_PID=
  fi
 }
 function warning {
  message=$1
@ -817,10 +854,18 @@ clientConnection:
 hostnameOverride: ${HOSTNAME_OVERRIDE}
 mode: ${KUBE_PROXY_MODE}
 EOF
    if [[ -n ${FEATURE_GATES} ]]; then
      echo "featureGates:"
      # Convert from foo=true,bar=false to
      #   foo: true
      #   bar: false
      for gate in $(echo ${FEATURE_GATES} | tr ',' ' '); do
        echo $gate | sed -e 's/\(.*\)=\(.*\)/  \1: \2/'
      done
    fi >>/tmp/kube-proxy.yaml
    sudo "${GO_OUT}/hyperkube" proxy \
      --v=${LOG_LEVEL} \
      --feature-gates="${FEATURE_GATES}" \
      --config=/tmp/kube-proxy.yaml \
      --master="https://${API_HOST}:${API_SECURE_PORT}" >"${PROXY_LOG}" 2>&1 &
    PROXY_PID=$!
@ -1025,7 +1070,7 @@ fi
 print_success
 if [[ "${ENABLE_DAEMON}" = false ]]; then
-  while true; do sleep 1; done
+  while true; do sleep 1; healthcheck; done
 fi
 if [[ "${KUBETEST_IN_DOCKER:-}" == "true" ]]; then