#!/bin/bash # Copyright 2014 The Kubernetes Authors All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Verifies that services and virtual IPs work. set -o errexit set -o nounset set -o pipefail KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../.. : ${KUBE_VERSION_ROOT:=${KUBE_ROOT}} : ${KUBECTL:="${KUBE_VERSION_ROOT}/cluster/kubectl.sh"} : ${KUBE_CONFIG_FILE:="config-test.sh"} export KUBECTL KUBE_CONFIG_FILE TEST_NAMESPACE="services-test-${RANDOM}" KUBECTL="${KUBECTL} --namespace=${TEST_NAMESPACE}" source "${KUBE_ROOT}/cluster/kube-env.sh" source "${KUBE_VERSION_ROOT}/cluster/${KUBERNETES_PROVIDER}/util.sh" prepare-e2e function error() { echo "$@" >&2 exit 1 } function sort_args() { [ $# == 0 ] && return a=($(printf "%s\n" "$@" | sort -n)) echo "${a[*]}" } # Join args $2... with $1 between them. # Example: join ", " x y z => x, y, z function join() { local sep item sep=$1 shift echo -n "${1:-}" shift for item; do echo -n "${sep}${item}" done echo } svcs_to_clean=() function do_teardown() { ${KUBECTL} delete namespace "${TEST_NAMESPACE}" } function make_namespace() { echo "Making namespace '${TEST_NAMESPACE}'" ${KUBECTL} create -f - << __EOF__ { "kind": "Namespace", "apiVersion": "v1", "metadata": { "name": "${TEST_NAMESPACE}" } } __EOF__ } wait_for_apiserver() { echo "Waiting for apiserver to be up" local i for i in $(seq 1 12); do results=$(ssh-to-node "${master}" " wget -q -T 1 -O - http://localhost:8080/healthz || true ") if [[ "${results}" == "ok" ]]; then return fi sleep 5 # wait for apiserver to restart done error "restarting apiserver timed out" } # Args: # $1: service name # $2: service port # $3: service replica count function start_service() { echo "Starting service '${TEST_NAMESPACE}/$1' on port $2 with $3 replicas" svcs_to_clean+=("$1") ${KUBECTL} create -f - << __EOF__ { "kind": "ReplicationController", "apiVersion": "v1", "metadata": { "name": "$1", "labels": { "name": "$1" } }, "spec": { "replicas": $3, "selector": { "name": "$1" }, "template": { "metadata": { "labels": { "name": "$1" } }, "spec": { "containers": [ { "name": "$1", "image": "gcr.io/google_containers/serve_hostname:1.1", "ports": [ { "containerPort": 9376, "protocol": "TCP" } ] } ] } } } } __EOF__ ${KUBECTL} create -f - << __EOF__ { "kind": "Service", "apiVersion": "v1", "metadata": { "name": "$1", "labels": { "name": "$1" } }, "spec": { "ports": [ { "protocol": "TCP", "port": $2, "targetPort": 9376 } ], "selector": { "name": "$1" } } } __EOF__ } # Args: # $1: service name function stop_service() { echo "Stopping service '$1'" ${KUBECTL} stop rc "$1" || true ${KUBECTL} delete services "$1" || true } # Args: # $1: service name # $2: expected pod count function query_pods() { # This fails very occasionally, so retry a bit. local pods_unsorted=() local i for i in $(seq 1 10); do pods_unsorted=($(${KUBECTL} get pods -o template \ '--template={{range.items}}{{.metadata.name}} {{end}}' \ '--api-version=v1' \ -l name="$1")) found="${#pods_unsorted[*]}" if [[ "${found}" == "$2" ]]; then break fi sleep 3 done if [[ "${found}" != "$2" ]]; then error "Failed to query pods for $1: expected $2, found ${found}" fi # The "return" is a sorted list of pod IDs. sort_args "${pods_unsorted[@]}" } # Args: # $1: service name # $2: pod count function wait_for_pods() { echo "Querying pods in $1" local pods_sorted=$(query_pods "$1" "$2") printf '\t%s\n' ${pods_sorted} # Container turn up on a clean cluster can take a while for the docker image # pulls. Wait a generous amount of time. # TODO: Sometimes pods change underneath us, which makes the GET fail (404). # Maybe this test can be loosened and still be useful? pods_needed=$2 local i for i in $(seq 1 30); do echo "Waiting for ${pods_needed} pods to become 'running'" pods_needed="$2" for id in ${pods_sorted}; do status=$(${KUBECTL} get pods "${id}" -o template --template='{{.status.phase}}' --api-version=v1) if [[ "${status}" == "Running" ]]; then pods_needed=$((pods_needed-1)) fi done if [[ "${pods_needed}" == 0 ]]; then break fi sleep 3 done if [[ "${pods_needed}" -gt 0 ]]; then error "Pods for $1 did not come up in time" fi } # Args: # $1: service name # $2: service IP # $3: service port # $4: pod count # $5: pod IDs (sorted) function wait_for_service_up() { local i local found_pods echo "waiting for $1 at $2:$3" # TODO: Reduce this interval once we have a sense for the latency distribution. for i in $(seq 1 10); do results=($(ssh-to-node "${test_node}" " set -e; for i in $(seq -s' ' 1 $(($4*3))); do wget -q -T 1 -O - http://$2:$3 || true; echo; done | sort -n | uniq ")) found_pods=$(sort_args "${results[@]:+${results[@]}}") if [[ "${found_pods}" == "$5" ]]; then return fi echo "expected '$5', got '${found_pods}': will try again" sleep 5 # wait for endpoints to propagate done error "$1: failed to verify portal from host" } # Args: # $1: service name # $2: service IP # $3: service port function wait_for_service_down() { local i for i in $(seq 1 15); do $(ssh-to-node "${test_node}" " curl -s --connect-timeout 2 "http://$2:$3" >/dev/null 2>&1 && exit 1 || exit 0; ") && break echo "Waiting for $1 to go down" sleep 2 done } # Args: # $1: service name # $2: service IP # $3: service port # $4: pod count # $5: pod IDs (sorted) function verify_from_container() { local i local found_pods echo "waiting for $1 at $2:$3" # TODO: Reduce this interval once we have a sense for the latency distribution. for i in $(seq 1 10); do results=($(ssh-to-node "${test_node}" " set -e; sudo docker pull gcr.io/google_containers/busybox >/dev/null; sudo docker run gcr.io/google_containers/busybox sh -c ' for i in $(seq -s' ' 1 $(($4*3))); do wget -q -T 1 -O - http://$2:$3 || true; echo; done '" | sort -n | uniq)) found_pods=$(sort_args "${results[@]:+${results[@]}}") if [[ "${found_pods}" == "$5" ]]; then return fi echo "expected '$5', got '${found_pods}': will try again" sleep 5 # wait for endpoints to propagate done error "$1: failed to verify portal from host" } trap do_teardown EXIT # Get node IP addresses and pick one as our test point. detect-minions test_node="${MINION_NAMES[0]}" master="${MASTER_NAME}" # Make our namespace make_namespace # Launch some pods and services. svc1_name="service1" svc1_port=80 svc1_count=3 start_service "${svc1_name}" "${svc1_port}" "${svc1_count}" svc2_name="service2" svc2_port=80 svc2_count=3 start_service "${svc2_name}" "${svc2_port}" "${svc2_count}" # Wait for the pods to become "running". wait_for_pods "${svc1_name}" "${svc1_count}" wait_for_pods "${svc2_name}" "${svc2_count}" # Get the sorted lists of pods. svc1_pods=$(query_pods "${svc1_name}" "${svc1_count}") svc2_pods=$(query_pods "${svc2_name}" "${svc2_count}") # Get the VIP IPs. svc1_ip=$(${KUBECTL} get services -o template '--template={{.spec.clusterIP}}' "${svc1_name}" --api-version=v1) test -n "${svc1_ip}" || error "Service1 IP is blank" svc2_ip=$(${KUBECTL} get services -o template '--template={{.spec.clusterIP}}' "${svc2_name}" --api-version=v1) test -n "${svc2_ip}" || error "Service2 IP is blank" if [[ "${svc1_ip}" == "${svc2_ip}" ]]; then error "VIPs conflict: ${svc1_ip}" fi # # Test 1: Prove that the service VIP is alive. # echo "Test 1: Prove that the service VIP is alive." echo "Verifying the VIP from the host" wait_for_service_up "${svc1_name}" "${svc1_ip}" "${svc1_port}" \ "${svc1_count}" "${svc1_pods}" wait_for_service_up "${svc2_name}" "${svc2_ip}" "${svc2_port}" \ "${svc2_count}" "${svc2_pods}" echo "Verifying the VIP from a container" verify_from_container "${svc1_name}" "${svc1_ip}" "${svc1_port}" \ "${svc1_count}" "${svc1_pods}" verify_from_container "${svc2_name}" "${svc2_ip}" "${svc2_port}" \ "${svc2_count}" "${svc2_pods}" # # Test 2: Bounce the proxy and make sure the VIP comes back. # echo "Test 2: Bounce the proxy and make sure the VIP comes back." echo "Restarting kube-proxy" restart-kube-proxy "${test_node}" echo "Verifying the VIP from the host" wait_for_service_up "${svc1_name}" "${svc1_ip}" "${svc1_port}" \ "${svc1_count}" "${svc1_pods}" wait_for_service_up "${svc2_name}" "${svc2_ip}" "${svc2_port}" \ "${svc2_count}" "${svc2_pods}" echo "Verifying the VIP from a container" verify_from_container "${svc1_name}" "${svc1_ip}" "${svc1_port}" \ "${svc1_count}" "${svc1_pods}" verify_from_container "${svc2_name}" "${svc2_ip}" "${svc2_port}" \ "${svc2_count}" "${svc2_pods}" # # Test 3: Stop one service and make sure it is gone. # echo "Test 3: Stop one service and make sure it is gone." stop_service "${svc1_name}" wait_for_service_down "${svc1_name}" "${svc1_ip}" "${svc1_port}" # # Test 4: Bring up another service. # TODO: Actually add a test to force re-use. # echo "Test 4: Bring up another service." svc3_name="service3" svc3_port=80 svc3_count=3 start_service "${svc3_name}" "${svc3_port}" "${svc3_count}" # Wait for the pods to become "running". wait_for_pods "${svc3_name}" "${svc3_count}" # Get the sorted lists of pods. svc3_pods=$(query_pods "${svc3_name}" "${svc3_count}") # Get the VIP. svc3_ip=$(${KUBECTL} get services -o template '--template={{.spec.clusterIP}}' "${svc3_name}" --api-version=v1) test -n "${svc3_ip}" || error "Service3 IP is blank" echo "Verifying the VIPs from the host" wait_for_service_up "${svc3_name}" "${svc3_ip}" "${svc3_port}" \ "${svc3_count}" "${svc3_pods}" echo "Verifying the VIPs from a container" verify_from_container "${svc3_name}" "${svc3_ip}" "${svc3_port}" \ "${svc3_count}" "${svc3_pods}" # # Test 5: Remove the iptables rules, make sure they come back. # echo "Test 5: Remove the iptables rules, make sure they come back." echo "Manually removing iptables rules" # Remove both the new and old style chains, in case we're testing on an old kubelet ssh-to-node "${test_node}" "sudo iptables -t nat -F KUBE-PORTALS-HOST || true" ssh-to-node "${test_node}" "sudo iptables -t nat -F KUBE-PORTALS-CONTAINER || true" echo "Verifying the VIPs from the host" wait_for_service_up "${svc3_name}" "${svc3_ip}" "${svc3_port}" \ "${svc3_count}" "${svc3_pods}" echo "Verifying the VIPs from a container" verify_from_container "${svc3_name}" "${svc3_ip}" "${svc3_port}" \ "${svc3_count}" "${svc3_pods}" # # Test 6: Restart the master, make sure VIPs come back. # echo "Test 6: Restart the master, make sure VIPs come back." echo "Restarting the master" restart-apiserver "${master}" wait_for_apiserver echo "Verifying the VIPs from the host" wait_for_service_up "${svc3_name}" "${svc3_ip}" "${svc3_port}" \ "${svc3_count}" "${svc3_pods}" echo "Verifying the VIPs from a container" verify_from_container "${svc3_name}" "${svc3_ip}" "${svc3_port}" \ "${svc3_count}" "${svc3_pods}" # # Test 7: Bring up another service, make sure it does not re-use IPs. # echo "Test 7: Bring up another service, make sure it does not re-use IPs." svc4_name="service4" svc4_port=80 svc4_count=3 start_service "${svc4_name}" "${svc4_port}" "${svc4_count}" # Wait for the pods to become "running". wait_for_pods "${svc4_name}" "${svc4_count}" # Get the sorted lists of pods. svc4_pods=$(query_pods "${svc4_name}" "${svc4_count}") # Get the VIP. svc4_ip=$(${KUBECTL} get services -o template '--template={{.spec.clusterIP}}' "${svc4_name}" --api-version=v1) test -n "${svc4_ip}" || error "Service4 IP is blank" if [[ "${svc4_ip}" == "${svc2_ip}" || "${svc4_ip}" == "${svc3_ip}" ]]; then error "VIPs conflict: ${svc4_ip}" fi echo "Verifying the VIPs from the host" wait_for_service_up "${svc4_name}" "${svc4_ip}" "${svc4_port}" \ "${svc4_count}" "${svc4_pods}" echo "Verifying the VIPs from a container" verify_from_container "${svc4_name}" "${svc4_ip}" "${svc4_port}" \ "${svc4_count}" "${svc4_pods}" exit 0