2014-07-29 04:42:53 +00:00
#!/bin/bash
2015-05-01 16:19:44 +00:00
# Copyright 2014 The Kubernetes Authors All rights reserved.
2014-07-29 04:42:53 +00:00
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2015-04-21 20:30:16 +00:00
# Validates that the cluster is healthy.
2014-07-29 04:42:53 +00:00
2014-10-06 20:25:27 +00:00
set -o errexit
set -o nounset
set -o pipefail
2014-07-29 04:42:53 +00:00
2014-10-03 21:58:49 +00:00
KUBE_ROOT = $( dirname " ${ BASH_SOURCE } " ) /..
source " ${ KUBE_ROOT } /cluster/kube-env.sh "
2015-07-02 16:48:31 +00:00
source " ${ KUBE_ROOT } /cluster/kube-util.sh "
2014-07-29 04:42:53 +00:00
2015-08-04 18:14:46 +00:00
EXPECTED_NUM_NODES = " ${ NUM_MINIONS } "
if [ [ " ${ REGISTER_MASTER_KUBELET :- } " = = "true" ] ] ; then
EXPECTED_NUM_NODES = $(( EXPECTED_NUM_NODES+1))
fi
2015-01-14 00:03:30 +00:00
# Make several attempts to deal with slow cluster birth.
2014-12-09 23:37:06 +00:00
attempt = 0
while true; do
2015-06-19 05:46:43 +00:00
# The "kubectl get nodes -o template" exports node information.
2015-05-07 22:50:11 +00:00
#
2015-06-19 05:46:43 +00:00
# Echo the output and gather 2 counts:
2015-05-07 22:50:11 +00:00
# - Total number of nodes.
# - Number of "ready" nodes.
2015-08-04 18:14:46 +00:00
#
# Suppress errors from kubectl output because during cluster bootstrapping
# for clusters where the master node is registered, the apiserver will become
# available and then get restarted as the kubelet configures the docker bridge.
2015-08-17 23:45:32 +00:00
nodes_status = $( " ${ KUBE_ROOT } /cluster/kubectl.sh " get nodes -o template --template= '{{range .items}}{{with index .status.conditions 0}}{{.type}}:{{.status}},{{end}}{{end}}' --api-version= v1) || true
2015-06-19 05:46:43 +00:00
found = $( echo " ${ nodes_status } " | tr "," "\n" | grep -c 'Ready:' ) || true
ready = $( echo " ${ nodes_status } " | tr "," "\n" | grep -c 'Ready:True' ) || true
2015-05-07 22:50:11 +00:00
2015-08-04 18:14:46 +00:00
if ( ( " ${ found } " = = " ${ EXPECTED_NUM_NODES } " ) ) && ( ( " ${ ready } " = = " ${ EXPECTED_NUM_NODES } " ) ) ; then
2014-12-09 23:37:06 +00:00
break
else
2015-05-29 07:44:14 +00:00
# Set the timeout to ~10minutes (40 x 15 second) to avoid timeouts for 100-node clusters.
if ( ( attempt > 40 ) ) ; then
2015-08-04 18:14:46 +00:00
echo -e " ${ color_red } Detected ${ ready } ready nodes, found ${ found } nodes out of expected ${ EXPECTED_NUM_NODES } . Your cluster may not be working. ${ color_norm } "
2015-06-19 05:46:43 +00:00
" ${ KUBE_ROOT } /cluster/kubectl.sh " get nodes
2014-12-09 23:37:06 +00:00
exit 2
2015-05-27 09:28:33 +00:00
else
2015-08-04 18:14:46 +00:00
echo -e " ${ color_yellow } Waiting for ${ EXPECTED_NUM_NODES } ready nodes. ${ ready } ready nodes, ${ found } registered. Retrying. ${ color_norm } "
2014-12-09 23:37:06 +00:00
fi
attempt = $(( attempt+1))
2015-05-27 09:28:33 +00:00
sleep 15
2014-12-09 23:37:06 +00:00
fi
done
2015-06-19 05:46:43 +00:00
echo " Found ${ found } node(s). "
" ${ KUBE_ROOT } /cluster/kubectl.sh " get nodes
2014-07-29 04:42:53 +00:00
2015-04-21 20:30:16 +00:00
attempt = 0
while true; do
2015-06-19 05:46:43 +00:00
# The "kubectl componentstatuses -o template" exports components health information.
2015-04-21 20:30:16 +00:00
#
2015-06-19 05:46:43 +00:00
# Echo the output and gather 2 counts:
# - Total number of componentstatuses.
# - Number of "healthy" components.
cs_status = $( " ${ KUBE_ROOT } /cluster/kubectl.sh " get componentstatuses -o template --template= '{{range .items}}{{with index .conditions 0}}{{.type}}:{{.status}},{{end}}{{end}}' --api-version= v1) || true
componentstatuses = $( echo " ${ cs_status } " | tr "," "\n" | grep -c 'Healthy:' ) || true
healthy = $( echo " ${ cs_status } " | tr "," "\n" | grep -c 'Healthy:True' ) || true
2014-10-17 21:48:11 +00:00
2015-06-19 05:46:43 +00:00
if ( ( componentstatuses > healthy) ) ; then
2015-04-21 20:30:16 +00:00
if ( ( attempt < 5) ) ; then
echo -e " ${ color_yellow } Cluster not working yet. ${ color_norm } "
2014-12-09 23:37:06 +00:00
attempt = $(( attempt+1))
sleep 30
2015-04-21 20:30:16 +00:00
else
echo -e " ${ color_yellow } Validate output: ${ color_norm } "
2015-06-19 05:46:43 +00:00
" ${ KUBE_ROOT } /cluster/kubectl.sh " get cs
2015-04-21 20:30:16 +00:00
echo -e " ${ color_red } Validation returned one or more failed components. Cluster is probably broken. ${ color_norm } "
exit 1
fi
else
break
fi
2014-07-29 04:42:53 +00:00
done
2015-04-21 20:30:16 +00:00
echo "Validate output:"
2015-06-19 05:46:43 +00:00
" ${ KUBE_ROOT } /cluster/kubectl.sh " get cs
2014-12-09 23:37:06 +00:00
echo -e " ${ color_green } Cluster validation succeeded ${ color_norm } "