From 1c4e47fe2242e75b07902c36e39caecdca7586b6 Mon Sep 17 00:00:00 2001 From: Max Forbes Date: Fri, 6 Mar 2015 13:07:47 -0500 Subject: [PATCH] Wait until all startup pods are 'Running' before running e2e tests. --- hack/e2e-internal/e2e-status.sh | 48 +++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/hack/e2e-internal/e2e-status.sh b/hack/e2e-internal/e2e-status.sh index e21a391691..e61d375edc 100755 --- a/hack/e2e-internal/e2e-status.sh +++ b/hack/e2e-internal/e2e-status.sh @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# e2e-status checks that the status of a cluster is acceptable for running +# e2e tests. set -o errexit set -o nounset set -o pipefail @@ -32,3 +34,49 @@ source "${KUBE_VERSION_ROOT}/cluster/${KUBERNETES_PROVIDER}/util.sh" prepare-e2e ${KUBECTL} version + +# Before running tests, ensure that all pods are 'Running'. Tests can timeout +# and fail because the test pods don't run in time. The problem is that the pods +# that a cluster runs on startup take too long to start running, with sequential +# Docker pulls of large images being the culprit. These startup pods block the +# test pods from running. + +# Settings: +# timeout is in seconds; 1200 = 20 minutes. +timeout=1200 +# pause is how many seconds to sleep between pod get calls. +pause=5 +# min_pods is the minimum number of pods we require. +min_pods=1 + +# Check pod statuses. +deadline=$(($(date '+%s')+${timeout})) +echo "Waiting at most ${timeout} seconds for all pods to be 'Running'" >&2 +all_running=0 +until [[ ${all_running} == 1 ]]; do + if [[ "$(date '+%s')" -ge "${deadline}" ]]; then + echo "All pods never 'Running' in time." >&2 + exit 1 + fi + statuses=($(${KUBECTL} get pods --template='{{range.items}}{{.currentState.status}} {{end}}')) + + # Ensure that we have enough pods. + echo "Found ${#statuses[@]} pods with statuses: ${statuses[@]}" >&2 + if [[ ${#statuses[@]} -lt ${min_pods} ]]; then + continue + fi + + # Then, ensure all pods found are 'Running'. + found_running=1 + for status in "${statuses[@]}"; do + if [[ "${status}" != "Running" ]]; then + # If we find a pod that isn't 'Running', sleep here to avoid delaying + # other code paths (where all pods are 'Running'). + found_running=0 + sleep ${pause} + break + fi + done + all_running=${found_running} +done +echo "All pods are 'Running'" >&2