diff --git a/Makefile b/Makefile index 9ff61037b2..884c0ca972 100644 --- a/Makefile +++ b/Makefile @@ -93,16 +93,28 @@ test_e2e: # Build and run node end-to-end tests. # # Args: -# FOCUS: regexp that matches the tests to be run -# SKIP: regexp that matches the tests that needs to be skipped +# FOCUS: regexp that matches the tests to be run. Defaults to "". +# SKIP: regexp that matches the tests that needs to be skipped. Defaults to "". +# RUN_UNTIL_FAILURE: Ff true, pass --untilItFails to ginkgo so tests are run repeatedly until they fail. Defaults to false. +# REMOTE: If true, run the tests on a remote host instance on GCE. Defaults to false. +# IMAGES: for REMOTE=true only. Comma delimited list of images for creating remote hosts to run tests against. Defaults to "e2e-node-containervm-v20160321-image". +# LIST_IMAGES: If true, don't run tests. Just output the list of available images for testing. Defaults to false. +# HOSTS: for REMOTE=true only. Comma delimited list of running gce hosts to run tests against. Defaults to "". +# DELETE_INSTANCES: for REMOTE=true only. Delete any instances created as part of this test run. Defaults to false. +# ARTIFACTS: for REMOTE=true only. Local directory to scp test artifacts into from the remote hosts. Defaults to ""/tmp/_artifacts". +# REPORT: for REMOTE=false only. Local directory to write juntil xml results to. Defaults to "/tmp/". +# CLEANUP: for REMOTE=true only. If false, do not stop processes or delete test files on remote hosts. Defaults to true. +# IMAGE_PROJECT: for REMOTE=true only. Project containing images provided to IMAGES. Defaults to "kubernetes-node-e2e-images". +# INSTANCE_PREFIX: for REMOTE=true only. Instances created from images will have the name "${INSTANCE_PREFIX}-${IMAGE_NAME}". Defaults to "test"/ +# # Example: # make test_e2e_node FOCUS=kubelet SKIP=container +# make test_e2e_node REMOTE=true DELETE_INSTANCES=true # Build and run tests. test_e2e_node: - hack/e2e-node-test.sh FOCUS=$(FOCUS) SKIP=$(SKIP) + hack/e2e-node-test.sh .PHONY: test_e2e_node - # Remove all build artifacts. # # Example: diff --git a/docs/devel/e2e-node-tests.md b/docs/devel/e2e-node-tests.md index d2634aa976..f286913433 100644 --- a/docs/devel/e2e-node-tests.md +++ b/docs/devel/e2e-node-tests.md @@ -34,147 +34,187 @@ Documentation for other releases can be found at # Node End-To-End tests -Node e2e tests start kubelet and minimal supporting infrastructure to validate -the kubelet on a host. Tests can be run either locally, against a remote host or -against a GCE image. +Node e2e tests are component tests meant for testing the Kubelet code on a custom host environment. + +Tests can be run either locally or against a host running on GCE. + +Node e2e tests are run as both pre- and post- submit tests by the Kubernetes project. *Note: Linux only. Mac and Windows unsupported.* -## Running tests locally +# Running tests -etcd must be installed and on the PATH to run the node e2e tests. To verify -etcd is installed: `which etcd`. You can find instructions for installing etcd -[on the etcd releases page](https://github.com/coreos/etcd/releases). +## Locally -Run the tests locally: `make test_e2e_node` +Why run tests *Locally*? Much faster than running tests Remotely. -Running the node e2e tests locally will build the kubernetes go source files and -then start the kubelet, kube-apiserver, and etcd binaries on localhost before -executing the ginkgo tests under test/e2e_node against the local kubelet -instance. +Prerequisites: +- [Install etcd](https://github.com/coreos/etcd/releases) on your PATH + - Verify etcd is installed correctly by running `which etcd` +- [Install ginkgo](https://github.com/onsi/ginkgo) on your PATH + - Verify ginkgo is installed correctly by running `which ginkgo` -## Running tests against a remote host - -The node e2e tests can be run against one or more remote hosts using one of: -* [e2e-node-jenkins.sh](../../test/e2e_node/jenkins/e2e-node-jenkins.sh) (gce -only) -* [run_e2e.go](../../test/e2e_node/runner/run_e2e.go) (requires passwordless ssh -and remote passwordless sudo access over ssh) -* using [run_e2e.go](../../test/e2e_node/runner/run_e2e.go) to build a tar.gz -and executing on host (requires host access w/ remote sudo) - -### Option 1: Configuring a new remote host from scratch for testing - -The host must contain an environment capable of running a minimal kubernetes cluster -consisting of etcd, the kube-apiserver, and kubelet. The steps required to step a host vary between distributions -(coreos, rhel, ubuntu, etc), but may include: -* install etcd -* install docker -* add user running tests to docker group -* install lxc and update grub commandline -* enable tty-less sudo access - -These steps should be captured in [setup_host.sh](../../test/e2e_node/environment/setup_host.sh) - -### Option 2: Copying an existing host image from another project - -If there is an existing image in another project you would like to use, you can use the script -[copy-e2e-image.sh](../../test/e2e_node/jenkins/copy-e2e-image.sh) to copy an image -from one GCE project to another. +From the Kubernetes base directory, run: ```sh -copy-e2e-image.sh +make test_e2e_node ``` -### Running the tests +This will: run the *ginkgo* binary against the subdirectory *test/e2e_node*, which will in turn: +- Ask for sudo access (needed for running some of the processes) +- Build the Kubernetes source code +- Pre-pull docker images used by the tests +- Start a local instance of *etcd* +- Start a local instance of *kube-apiserver* +- Start a local instance of *kubelet* +- Run the test using the locally started processes +- Output the test results to STDOUT +- Stop *kubelet*, *kube-apiserver*, and *etcd* -1. If running tests against a running host on gce +## Remotely - * Make sure host names are resolvable to ssh by running `gcloud compute config-ssh` to - update ~/.ssh/config with the GCE hosts. After running this command, check the hostnames - in the ~/.ssh/config file and verify you have the correct access by running `ssh `. +Why Run tests *Remotely*? Tests will be run in a customized pristine environment. Closely mimics what will be done +as pre- and post- submit testing performed by the project. - * Copy [template.properties](../../test/e2e_node/jenkins/template.properties) +Prerequisites: +- [join the googlegroup](https://groups.google.com/forum/#!forum/kubernetes-dev) +`kubernetes-dev@googlegroups.com` + - *This provides read access to the node test images.* +- Setup a [Google Cloud Platform](https://cloud.google.com/) account and project with Google Compute Engine enabled +- Install and setup the [gcloud sdk](https://cloud.google.com/sdk/downloads) + - Verify the sdk is setup correctly by running `gcloud compute instances list` and `gcloud compute images list --project kubernetes-node-e2e-images` - * Fill in `GCE_HOSTS` with the name of the host +Run: - * Run `test/e2e_node/jenkins/e2e-node-jenkins.sh ` - * **Must be run from kubernetes root** +```sh +make test_e2e_node REMOTE=true +``` -2. If running against a host anywhere else +This will: +- Build the Kubernetes source code +- Create a new GCE instance using the default test image + - Instance will be called **test-e2e-node-containervm-v20160321-image** +- Lookup the instance public ip address +- Copy a compressed archive file to the host containing the following binaries: + - ginkgo + - kubelet + - kube-apiserver + - e2e_node.test (this binary contains the actual tests to be run) +- Unzip the archive to a directory under **/tmp/gcloud** +- Run the tests using the `ginkgo` command + - Starts etcd, kube-apiserver, kubelet + - The ginkgo command is used because this supports more features than running the test binary directly +- Output the remote test results to STDOUT +- `scp` the log files back to the local host under /tmp/_artifacts/e2e-node-containervm-v20160321-image +- Stop the processes on the remote host +- **Leave the GCE instance running** - * **Requires password-less ssh and sudo access** - - * Make sure this works - e.g. `ssh -- sudo echo "ok"` - * If ssh flags are required (e.g. `-i`), they can be used and passed to the -tests with `--ssh-options` - - * `go run test/e2e_node/runner/run_e2e.go --logtostderr --hosts ` - - * **Must be run from kubernetes root** - -3. Alternatively, manually build and copy `e2e_node_test.tar.gz` to a remote -host - - * Build the tar.gz `go run test/e2e_node/runner/run_e2e.go --logtostderr ---build-only` - - * Copy `e2e_node_test.tar.gz` to the remote host - - * Extract the archive on the remote host `tar -xzvf e2e_node_test.tar.gz` - - * Run the tests `./e2e_node.test --logtostderr --vmodule=*=2 ---build-services=false --node-name=` - - * Note: This must be run from the directory containing the kubelet and -kube-apiserver binaries. - -## Running tests against a gce image - -* Option 1: Build a gce image from a prepared gce host - * Create the host from a base image and configure it (see above) - * Run tests against this remote host to ensure that it is setup correctly -before doing anything else - * Create a gce *snapshot* of the instance - * Create a gce *disk* from the snapshot - * Create a gce *image* from the disk -* Option 2: Copy a prepared image from another project - * Instructions above -* Test that the necessary gcloud credentials are setup for the project - * `gcloud compute --project --zone images list` - * Verify that your image appears in the list -* Copy [template.properties](../../test/e2e_node/jenkins/template.properties) - * Fill in `GCE_PROJECT`, `GCE_ZONE`, `GCE_IMAGES` -* Run `test/e2e_node/jenkins/e2e-node-jenkins.sh ` - * **Must be run from kubernetes root** - -## Kubernetes Jenkins CI and PR builder - -Node e2e tests are run against a static list of host environments continuously -or when manually triggered on a github.com pull requests using the trigger -phrase `@k8s-bot test node e2e` - -### CI Host environments - -TBD - -### PR builder host environments - -| linux distro | distro version | docker version | etcd version | cloud provider | -|-----------------|----------------|----------------|--------------|----------------| -| containervm | | 1.8 | | gce | -| coreos | stable | 1.8 | | gce | -| debian | jessie | 1.10 | | gce | -| ubuntu | trusty | 1.8 | | gce | -| ubuntu | trusty | 1.9 | | gce | -| ubuntu | trusty | 1.10 | | gce | +**Note: Subsequent tests run using the same image will *reuse the existing host* instead of deleting it and +provisioning a new one. To delete the GCE instance after each test see +*[DELETE_INSTANCE](#delete-instance-after-tests-run)*.** +# Additional Remote Options +## Run tests using different images +This is useful if you want to run tests against a host using a different OS distro or container runtime than +provided by the default image. +List the available test images using gcloud. +```sh +make test_e2e_node LIST_IMAGES=true +``` + +This will output a list of the available images for the default image project. + +Then run: + +```sh +make test_e2e_node REMOTE=true IMAGES="" +``` + +## Run tests against a running GCE instance (not an image) + +This is useful if you have an host instance running already and want to run the tests there instead of on a new instance. + +```sh +make test_e2e_node REMOTE=true HOSTS="" +``` + +## Delete instance after tests run + +This is useful if you want recreate the instance for each test run to trigger flakes related to starting the instance. + +```sh +make test_e2e_node REMOTE=true DELETE_INSTANCES=true +``` + +## Keep instance, test binaries, and *processes* around after tests run + +This is useful if you want to manually inspect or debug the kubelet process run as part of the tests. + +```sh +make test_e2e_node REMOTE=true CLEANUP=false +``` + +## Run tests using an image in another project + +This is useful if you want to create your own host image in another project and use it for testing. + +```sh +make test_e2e_node REMOTE=true IMAGE_PROJECT="" IMAGES="" +``` + +Setting up your own host image may require additional steps such as installing etcd or docker. See +[setup_host.sh](../../test/e2e_node/environment/setup_host.sh) for common steps to setup hosts to run node tests. + +## Create instances using a different instance name prefix + +This is useful if you want to create instances using a different name so that you can run multiple copies of the +test in parallel against different instances of the same image. + +```sh +make test_e2e_node REMOTE=true INSTANCE_PREFIX="my-prefix" +``` + +# Additional Test Options for both Remote and Local execution + +## Only run a subset of the tests + +To run tests matching a regex: + +```sh +make test_e2e_node REMOTE=true FOCUS="" +``` + +To run tests NOT matching a regex: + +```sh +make test_e2e_node REMOTE=true SKIP="" +``` + +## Run tests continually until they fail + +This is useful if you are trying to debug a flaky test failure. This will cause ginkgo to continually +run the tests until they fail. **Note: this will only perform test setup once (e.g. creating the instance) and is +less useful for catching flakes related creating the instance from an image.** + +```sh +make test_e2e_node REMOTE=true RUN_UNTIL_FAILURE=true +``` + +# Notes on tests run by the Kubernetes project during pre-, post- submit. + +The node e2e tests are run by the PR builder for each Pull Request and the results published at +the bottom of the comments section. To re-run just the node e2e tests from the PR builder add the comment +`@k8s-bot node e2e test this issue: #` and **include a link to the test +failure logs if caused by a flake.** + +The PR builder runs tests against the images listed in [jenkins-pull.properties](../../test/e2e_node/jenkins/jenkins-pull.properties) + +The post submit tests run against the images listed in [jenkins-ci.properties](../../test/e2e_node/jenkins/jenkins-ci.properties) diff --git a/hack/e2e-node-test.sh b/hack/e2e-node-test.sh index 0cd4169a5d..7954744ec1 100755 --- a/hack/e2e-node-test.sh +++ b/hack/e2e-node-test.sh @@ -20,6 +20,24 @@ source "${KUBE_ROOT}/hack/lib/init.sh" focus=${FOCUS:-""} skip=${SKIP:-""} report=${REPORT:-"/tmp/"} +artifacts=${ARTIFACTS:-"/tmp/_artifacts"} +remote=${REMOTE:-"false"} +images=${IMAGES:-""} +hosts=${HOSTS:-""} +if [[ $hosts == "" && $images == "" ]]; then + images="e2e-node-containervm-v20160321-image" +fi +image_project=${IMAGE_PROJECT:-"kubernetes-node-e2e-images"} +instance_prefix=${INSTANCE_PREFIX:-"test"} +cleanup=${CLEANUP:-"true"} +delete_instances=${DELETE_INSTANCES:-"false"} +run_until_failure=${RUN_UNTIL_FAILURE:-"false"} +list_images=${LIST_IMAGES:-"false"} + +if [[ $list_images == "true" ]]; then + gcloud compute images list --project="${image_project}" | grep "e2e-node" + exit 0 +fi ginkgo=$(kube::util::find-binary "ginkgo") if [[ -z "${ginkgo}" ]]; then @@ -27,12 +45,90 @@ if [[ -z "${ginkgo}" ]]; then exit 1 fi -# Refresh sudo credentials if not running on GCE. -if ! ping -c 1 -q metadata.google.internal &> /dev/null; then - sudo -v || exit 1 +if [ $remote = true ] ; then + # Setup the directory to copy test artifacts (logs, junit.xml, etc) from remote host to local host + if [ ! -d "${artifacts}" ]; then + echo "Creating artifacts directory at ${artifacts}" + mkdir -p ${artifacts} + fi + echo "Test artifacts will be written to ${artifacts}" + + # Get the compute zone + zone=$(gcloud info --format='value(config.properties.compute.zone)') + if [[ $zone == "" ]]; then + echo "Could not find gcloud compute/zone when running:\ngcloud info --format='value(config.properties.compute.zone)'" + exit 1 + fi + + # Get the compute project + project=$(gcloud info --format='value(config.project)') + if [[ $project == "" ]]; then + echo "Could not find gcloud project when running:\ngcloud info --format='value(config.project)'" + exit 1 + fi + + # Check if any of the images specified already have running instances. If so reuse those instances + # by moving the IMAGE to a HOST + if [[ $images != "" ]]; then + IFS=',' read -ra IM <<< "$images" + images="" + for i in "${IM[@]}"; do + if [[ $(gcloud compute instances list "${instance_prefix}-$i" | grep $i) ]]; then + if [[ $hosts != "" ]]; then + hosts="$hosts," + fi + echo "Reusing host ${instance_prefix}-$i" + hosts="${hosts}${instance_prefix}-${i}" + else + if [[ $images != "" ]]; then + images="$images," + fi + images="$images$i" + fi + done + fi + + # Parse the flags to pass to ginkgo + ginkgoflags="" + if [[ $focus != "" ]]; then + ginkgoflags="$ginkgoflags -focus=$focus " + fi + + if [[ $skip != "" ]]; then + ginkgoflags="$ginkgoflags -skip=$skip " + fi + + if [[ $run_until_failure != "" ]]; then + ginkgoflags="$ginkgoflags -untilItFails=$run_until_failure " + fi + + # Output the configuration we will try to run + echo "Running tests remotely using" + echo "Project: $project" + echo "Image Project: $image_project" + echo "Compute/Zone: $zone" + echo "Images: $images" + echo "Hosts: $hosts" + echo "Ginkgo Flags: $ginkgoflags" + + # Invoke the runner + go run test/e2e_node/runner/run_e2e.go --logtostderr --vmodule=*=2 --ssh-env="gce" \ + --zone="$zone" --project="$project" \ + --hosts="$hosts" --images="$images" --cleanup="$cleanup" \ + --results-dir="$artifacts" --ginkgo-flags="$ginkgoflags" \ + --image-project="$image_project" --instance-name-prefix="$instance_prefix" --setup-node="true" \ + --delete-instances="$delete_instances" + exit $? + +else + # Refresh sudo credentials if not running on GCE. + if ! ping -c 1 -q metadata.google.internal &> /dev/null; then + sudo -v || exit 1 + fi + + # Test using the host the script was run on + # Provided for backwards compatibility + "${ginkgo}" --focus=$focus --skip=$skip "${KUBE_ROOT}/test/e2e_node/" --report-dir=${report} \ + -- --alsologtostderr --v 2 --node-name $(hostname) --build-services=true --start-services=true --stop-services=true + exit $? fi - -# Provided for backwards compatibility -"${ginkgo}" --focus=$focus --skip=$skip "${KUBE_ROOT}/test/e2e_node/" --report-dir=${report} -- --alsologtostderr --v 2 --node-name $(hostname) --build-services=true --start-services=true --stop-services=true - -exit $? diff --git a/hack/verify-flags/known-flags.txt b/hack/verify-flags/known-flags.txt index 53e8d10313..9513c80f08 100644 --- a/hack/verify-flags/known-flags.txt +++ b/hack/verify-flags/known-flags.txt @@ -88,6 +88,7 @@ default-container-cpu-limit default-container-mem-limit delay-shutdown delete-collection-workers +delete-instances delete-namespace deleting-pods-burst deleting-pods-qps diff --git a/test/e2e_node/e2e_build.go b/test/e2e_node/e2e_build.go index 9c03a2cd3c..5cb2d3d126 100644 --- a/test/e2e_node/e2e_build.go +++ b/test/e2e_node/e2e_build.go @@ -120,7 +120,7 @@ func getK8sNodeTestDir() (string, error) { func getKubeletServerBin() string { bin, err := getK8sBin("kubelet") if err != nil { - panic(fmt.Sprintf("Could not locate kubelet binary.")) + glog.Fatalf("Could not locate kubelet binary %v.", err) } return bin } @@ -128,7 +128,7 @@ func getKubeletServerBin() string { func getApiServerBin() string { bin, err := getK8sBin("kube-apiserver") if err != nil { - panic(fmt.Sprintf("Could not locate kube-apiserver binary.")) + glog.Fatalf("Could not locate kube-apiserver binary %v.", err) } return bin } diff --git a/test/e2e_node/e2e_remote.go b/test/e2e_node/e2e_remote.go index 670247376b..a9ae1de2a6 100644 --- a/test/e2e_node/e2e_remote.go +++ b/test/e2e_node/e2e_remote.go @@ -26,6 +26,7 @@ import ( "os/user" "path/filepath" "strings" + "sync" "github.com/golang/glog" utilerrors "k8s.io/kubernetes/pkg/util/errors" @@ -41,6 +42,11 @@ var sshOptionsMap map[string]string const archiveName = "e2e_node_test.tar.gz" +var hostnameIpOverrides = struct { + sync.RWMutex + m map[string]string +}{m: make(map[string]string)} + func init() { usr, err := user.Current() if err != nil { @@ -51,9 +57,24 @@ func init() { } } +func AddHostnameIp(hostname, ip string) { + hostnameIpOverrides.Lock() + defer hostnameIpOverrides.Unlock() + hostnameIpOverrides.m[hostname] = ip +} + +func GetHostnameOrIp(hostname string) string { + hostnameIpOverrides.RLock() + defer hostnameIpOverrides.RUnlock() + if ip, found := hostnameIpOverrides.m[hostname]; found { + return ip + } + return hostname +} + // CreateTestArchive builds the local source and creates a tar archive e2e_node_test.tar.gz containing // the binaries k8s required for node e2e tests -func CreateTestArchive() string { +func CreateTestArchive() (string, error) { // Build the executables buildGo() @@ -65,50 +86,57 @@ func CreateTestArchive() string { ginkgoTest := filepath.Join(buildOutputDir, "e2e_node.test") if _, err := os.Stat(ginkgoTest); err != nil { - glog.Fatalf("Failed to locate test binary %s", ginkgoTest) + return "", fmt.Errorf("failed to locate test binary %s", ginkgoTest) } - kubelet := filepath.Join(buildOutputDir, "kubelet") if _, err := os.Stat(kubelet); err != nil { - glog.Fatalf("Failed to locate binary %s", kubelet) + return "", fmt.Errorf("failed to locate binary %s", kubelet) } apiserver := filepath.Join(buildOutputDir, "kube-apiserver") if _, err := os.Stat(apiserver); err != nil { - glog.Fatalf("Failed to locate binary %s", apiserver) + return "", fmt.Errorf("failed to locate binary %s", apiserver) + } + ginkgo := filepath.Join(buildOutputDir, "ginkgo") + if _, err := os.Stat(apiserver); err != nil { + return "", fmt.Errorf("failed to locate binary %s", ginkgo) } glog.Infof("Building archive...") tardir, err := ioutil.TempDir("", "node-e2e-archive") if err != nil { - glog.Fatalf("Failed to create temporary directory %v.", err) + return "", fmt.Errorf("failed to create temporary directory %v.", err) } defer os.RemoveAll(tardir) // Copy binaries out, err := exec.Command("cp", ginkgoTest, filepath.Join(tardir, "e2e_node.test")).CombinedOutput() if err != nil { - glog.Fatalf("Failed to copy e2e_node.test %v.", err) + return "", fmt.Errorf("failed to copy e2e_node.test %v.", err) } out, err = exec.Command("cp", kubelet, filepath.Join(tardir, "kubelet")).CombinedOutput() if err != nil { - glog.Fatalf("Failed to copy kubelet %v.", err) + return "", fmt.Errorf("failed to copy kubelet %v.", err) } out, err = exec.Command("cp", apiserver, filepath.Join(tardir, "kube-apiserver")).CombinedOutput() if err != nil { - glog.Fatalf("Failed to copy kube-apiserver %v.", err) + return "", fmt.Errorf("failed to copy kube-apiserver %v.", err) + } + out, err = exec.Command("cp", ginkgo, filepath.Join(tardir, "ginkgo")).CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to copy ginkgo %v.", err) } // Build the tar out, err = exec.Command("tar", "-zcvf", archiveName, "-C", tardir, ".").CombinedOutput() if err != nil { - glog.Fatalf("Failed to build tar %v. Output:\n%s", err, out) + return "", fmt.Errorf("failed to build tar %v. Output:\n%s", err, out) } dir, err := os.Getwd() if err != nil { - glog.Fatalf("Failed to get working directory %v.", err) + return "", fmt.Errorf("failed to get working directory %v.", err) } - return filepath.Join(dir, archiveName) + return filepath.Join(dir, archiveName), nil } // Returns the command output, whether the exit was ok, and any errors @@ -118,31 +146,31 @@ func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, s if err != nil { return "", false, fmt.Errorf("could not find username: %v", err) } - output, err := RunSshCommand("ssh", host, "--", "sudo", "usermod", "-a", "-G", "docker", uname.Username) + output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sudo", "usermod", "-a", "-G", "docker", uname.Username) if err != nil { - return "", false, fmt.Errorf("Instance %s not running docker daemon - Command failed: %s", host, output) + return "", false, fmt.Errorf("instance %s not running docker daemon - Command failed: %s", host, output) } } // Create the temp staging directory glog.Infof("Staging test binaries on %s", host) tmp := fmt.Sprintf("/tmp/gcloud-e2e-%d", rand.Int31()) - _, err := RunSshCommand("ssh", host, "--", "mkdir", tmp) + _, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "mkdir", tmp) if err != nil { // Exit failure with the error return "", false, err } if cleanup { defer func() { - output, err := RunSshCommand("ssh", host, "--", "rm", "-rf", tmp) + output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "rm", "-rf", tmp) if err != nil { - glog.Errorf("Failed to cleanup tmp directory %s on host %v. Output:\n%s", tmp, err, output) + glog.Errorf("failed to cleanup tmp directory %s on host %v. Output:\n%s", tmp, err, output) } }() } // Copy the archive to the staging directory - _, err = RunSshCommand("scp", archive, fmt.Sprintf("%s:%s/", host, tmp)) + _, err = RunSshCommand("scp", archive, fmt.Sprintf("%s:%s/", GetHostnameOrIp(host), tmp)) if err != nil { // Exit failure with the error return "", false, err @@ -158,12 +186,12 @@ func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, s // If we are unable to stop existing running k8s processes, we should see messages in the kubelet/apiserver/etcd // logs about failing to bind the required ports. glog.Infof("Killing any existing node processes on %s", host) - RunSshCommand("ssh", host, "--", "sh", "-c", cmd) + RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd) // Extract the archive cmd = getSshCommand(" && ", fmt.Sprintf("cd %s", tmp), fmt.Sprintf("tar -xzvf ./%s", archiveName)) glog.Infof("Extracting tar on %s", host) - output, err := RunSshCommand("ssh", host, "--", "sh", "-c", cmd) + output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd) if err != nil { // Exit failure with the error return "", false, err @@ -172,12 +200,13 @@ func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, s // Run the tests cmd = getSshCommand(" && ", fmt.Sprintf("cd %s", tmp), - fmt.Sprintf("timeout -k 30s %ds ./e2e_node.test --logtostderr --v 2 --build-services=false --stop-services=%t --node-name=%s --report-dir=%s/results --junit-file-number=%d %s", *testTimeoutSeconds, cleanup, host, tmp, junitFileNumber, *ginkgoFlags), + fmt.Sprintf("timeout -k 30s %ds ./ginkgo %s ./e2e_node.test -- --logtostderr --v 2 --build-services=false --stop-services=%t --node-name=%s --report-dir=%s/results --junit-file-number=%d", *testTimeoutSeconds, *ginkgoFlags, cleanup, host, tmp, junitFileNumber), ) aggErrs := []error{} glog.Infof("Starting tests on %s", host) - output, err = RunSshCommand("ssh", host, "--", "sh", "-c", cmd) + output, err = RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd) + if err != nil { aggErrs = append(aggErrs, err) } @@ -195,13 +224,13 @@ func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, s } func getTestArtifacts(host, testDir string) error { - _, err := RunSshCommand("scp", "-r", fmt.Sprintf("%s:%s/results/", host, testDir), fmt.Sprintf("%s/%s", *resultsDir, host)) + _, err := RunSshCommand("scp", "-r", fmt.Sprintf("%s:%s/results/", GetHostnameOrIp(host), testDir), fmt.Sprintf("%s/%s", *resultsDir, host)) if err != nil { return err } // Copy junit to the top of artifacts - _, err = RunSshCommand("scp", fmt.Sprintf("%s:%s/results/junit*", host, testDir), fmt.Sprintf("%s/", *resultsDir)) + _, err = RunSshCommand("scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIp(host), testDir), fmt.Sprintf("%s/", *resultsDir)) if err != nil { return err } @@ -223,7 +252,7 @@ func RunSshCommand(cmd string, args ...string) (string, error) { } output, err := exec.Command(cmd, args...).CombinedOutput() if err != nil { - return fmt.Sprintf("%s", output), fmt.Errorf("Command [%s %s] failed with error: %v and output:\n%s", cmd, strings.Join(args, " "), err, output) + return fmt.Sprintf("%s", output), fmt.Errorf("command [%s %s] failed with error: %v and output:\n%s", cmd, strings.Join(args, " "), err, output) } return fmt.Sprintf("%s", output), nil } diff --git a/test/e2e_node/jenkins/e2e-node-jenkins.sh b/test/e2e_node/jenkins/e2e-node-jenkins.sh index c774fa172c..c5e5e8f24f 100755 --- a/test/e2e_node/jenkins/e2e-node-jenkins.sh +++ b/test/e2e_node/jenkins/e2e-node-jenkins.sh @@ -29,7 +29,10 @@ set -x . $1 go build test/e2e_node/environment/conformance.go + +WORKSPACE=${WORKSPACE:-"/tmp/"} ARTIFACTS=${WORKSPACE}/_artifacts + mkdir -p ${ARTIFACTS} go run test/e2e_node/runner/run_e2e.go --logtostderr --vmodule=*=2 --ssh-env="gce" \ --zone="$GCE_ZONE" --project="$GCE_PROJECT" --image-project="$GCE_IMAGE_PROJECT" \ diff --git a/test/e2e_node/jenkins/jenkins-ci.properties b/test/e2e_node/jenkins/jenkins-ci.properties index 42c78abd4c..990b7f80e1 100644 --- a/test/e2e_node/jenkins/jenkins-ci.properties +++ b/test/e2e_node/jenkins/jenkins-ci.properties @@ -11,6 +11,6 @@ GCE_ZONE=us-central1-f GCE_PROJECT=kubernetes-jenkins GCE_IMAGE_PROJECT=kubernetes-jenkins CLEANUP=true -GINKGO_FLAGS=--ginkgo.skip=FLAKY +GINKGO_FLAGS=--skip=FLAKY SETUP_NODE=false diff --git a/test/e2e_node/jenkins/jenkins-pull.properties b/test/e2e_node/jenkins/jenkins-pull.properties index 62e5688f8a..db2e1331c1 100644 --- a/test/e2e_node/jenkins/jenkins-pull.properties +++ b/test/e2e_node/jenkins/jenkins-pull.properties @@ -11,5 +11,5 @@ GCE_ZONE=us-central1-f GCE_PROJECT=kubernetes-jenkins-pull GCE_IMAGE_PROJECT=kubernetes-jenkins-pull CLEANUP=true -GINKGO_FLAGS=--ginkgo.skip=FLAKY +GINKGO_FLAGS=--skip=FLAKY SETUP_NODE=false \ No newline at end of file diff --git a/test/e2e_node/runner/run_e2e.go b/test/e2e_node/runner/run_e2e.go index 1019073fa0..ac0ee95af8 100644 --- a/test/e2e_node/runner/run_e2e.go +++ b/test/e2e_node/runner/run_e2e.go @@ -27,6 +27,7 @@ import ( "net/http" "os" "strings" + "sync" "time" "k8s.io/kubernetes/test/e2e_node" @@ -45,11 +46,20 @@ var imageProject = flag.String("image-project", "", "gce project the hosts live var images = flag.String("images", "", "images to test") var hosts = flag.String("hosts", "", "hosts to test") var cleanup = flag.Bool("cleanup", true, "If true remove files from remote hosts and delete temporary instances") +var deleteInstances = flag.Bool("delete-instances", true, "If true, delete any instances created") var buildOnly = flag.Bool("build-only", false, "If true, build e2e_node_test.tar.gz and exit.") var setupNode = flag.Bool("setup-node", false, "When true, current user will be added to docker group on the test machine") var computeService *compute.Service +type Archive struct { + sync.Once + path string + err error +} + +var arc Archive + type TestResult struct { output string err error @@ -94,35 +104,22 @@ func main() { noColour = "\033[0m" } - archive := e2e_node.CreateTestArchive() - defer os.Remove(archive) + go arc.getArchive() + defer arc.deleteArchive() + + var err error + computeService, err = getComputeClient() + if err != nil { + glog.Fatalf("Unable to create gcloud compute service using defaults. Make sure you are authenticated. %v", err) + } results := make(chan *TestResult) running := 0 if *images != "" { - // Setup the gce client for provisioning instances - // Getting credentials on gce jenkins is flaky, so try a couple times - var err error - for i := 0; i < 10; i++ { - var client *http.Client - client, err = google.DefaultClient(oauth2.NoContext, compute.ComputeScope) - if err != nil { - continue - } - computeService, err = compute.New(client) - if err != nil { - continue - } - time.Sleep(time.Second * 6) - } - if err != nil { - glog.Fatalf("Unable to create gcloud compute service using defaults. Make sure you are authenticated. %v", err) - } - for _, image := range strings.Split(*images, ",") { running++ fmt.Printf("Initializing e2e tests using image %s.\n", image) - go func(image string, junitFileNum int) { results <- testImage(image, archive, junitFileNum) }(image, running) + go func(image string, junitFileNum int) { results <- testImage(image, junitFileNum) }(image, running) } } if *hosts != "" { @@ -130,7 +127,7 @@ func main() { fmt.Printf("Initializing e2e tests using host %s.\n", host) running++ go func(host string, junitFileNum int) { - results <- testHost(host, archive, *cleanup, junitFileNum, *setupNode) + results <- testHost(host, *cleanup, junitFileNum, *setupNode) }(host, running) } } @@ -159,9 +156,51 @@ func main() { } } +func (a *Archive) getArchive() (string, error) { + a.Do(func() { a.path, a.err = e2e_node.CreateTestArchive() }) + return a.path, a.err +} + +func (a *Archive) deleteArchive() { + path, err := a.getArchive() + if err != nil { + return + } + os.Remove(path) +} + // Run tests in archive against host -func testHost(host, archive string, deleteFiles bool, junitFileNum int, setupNode bool) *TestResult { - output, exitOk, err := e2e_node.RunRemote(archive, host, deleteFiles, junitFileNum, setupNode) +func testHost(host string, deleteFiles bool, junitFileNum int, setupNode bool) *TestResult { + instance, err := computeService.Instances.Get(*project, *zone, host).Do() + if err != nil { + return &TestResult{ + err: err, + host: host, + exitOk: false, + } + } + if strings.ToUpper(instance.Status) != "RUNNING" { + err = fmt.Errorf("instance %s not in state RUNNING, was %s.", host, instance.Status) + return &TestResult{ + err: err, + host: host, + exitOk: false, + } + } + externalIp := getExternalIp(instance) + if len(externalIp) > 0 { + e2e_node.AddHostnameIp(host, externalIp) + } + + path, err := arc.getArchive() + if err != nil { + // Don't log fatal because we need to do any needed cleanup contained in "defer" statements + return &TestResult{ + err: fmt.Errorf("unable to create test archive %v.", err), + } + } + + output, exitOk, err := e2e_node.RunRemote(path, host, deleteFiles, junitFileNum, setupNode) return &TestResult{ output: output, err: err, @@ -172,17 +211,21 @@ func testHost(host, archive string, deleteFiles bool, junitFileNum int, setupNod // Provision a gce instance using image and run the tests in archive against the instance. // Delete the instance afterward. -func testImage(image, archive string, junitFileNum int) *TestResult { +func testImage(image string, junitFileNum int) *TestResult { host, err := createInstance(image) - if *cleanup { + if *deleteInstances { defer deleteInstance(image) } if err != nil { return &TestResult{ - err: fmt.Errorf("Unable to create gce instance with running docker daemon for image %s. %v", image, err), + err: fmt.Errorf("unable to create gce instance with running docker daemon for image %s. %v", image, err), } } - return testHost(host, archive, false, junitFileNum, *setupNode) + + // Only delete the files if we are keeping the instance and want it cleaned up. + // If we are going to delete the instance, don't bother with cleaning up the files + deleteFiles := !*deleteInstances && *cleanup + return testHost(host, deleteFiles, junitFileNum, *setupNode) } // Provision a gce instance using image @@ -216,7 +259,7 @@ func createInstance(image string) (string, error) { return "", err } if op.Error != nil { - return "", fmt.Errorf("Could not create instance %s: %+v", name, op.Error) + return "", fmt.Errorf("could not create instance %s: %+v", name, op.Error) } instanceRunning := false @@ -230,17 +273,21 @@ func createInstance(image string) (string, error) { continue } if strings.ToUpper(instance.Status) != "RUNNING" { - err = fmt.Errorf("Instance %s not in state RUNNING, was %s.", name, instance.Status) + err = fmt.Errorf("instance %s not in state RUNNING, was %s.", name, instance.Status) continue } + externalIp := getExternalIp(instance) + if len(externalIp) > 0 { + e2e_node.AddHostnameIp(name, externalIp) + } var output string - output, err = e2e_node.RunSshCommand("ssh", name, "--", "sudo", "docker", "version") + output, err = e2e_node.RunSshCommand("ssh", e2e_node.GetHostnameOrIp(name), "--", "sudo", "docker", "version") if err != nil { - err = fmt.Errorf("Instance %s not running docker daemon - Command failed: %s", name, output) + err = fmt.Errorf("instance %s not running docker daemon - Command failed: %s", name, output) continue } if !strings.Contains(output, "Server") { - err = fmt.Errorf("Instance %s not running docker daemon - Server not found: %s", name, output) + err = fmt.Errorf("instance %s not running docker daemon - Server not found: %s", name, output) continue } instanceRunning = true @@ -248,6 +295,47 @@ func createInstance(image string) (string, error) { return name, err } +func getExternalIp(instance *compute.Instance) string { + for i := range instance.NetworkInterfaces { + ni := instance.NetworkInterfaces[i] + for j := range ni.AccessConfigs { + ac := ni.AccessConfigs[j] + if len(ac.NatIP) > 0 { + return ac.NatIP + } + } + } + return "" +} + +func getComputeClient() (*compute.Service, error) { + const retries = 10 + const backoff = time.Second * 6 + + // Setup the gce client for provisioning instances + // Getting credentials on gce jenkins is flaky, so try a couple times + var err error + var cs *compute.Service + for i := 0; i < retries; i++ { + if i > 0 { + time.Sleep(backoff) + } + + var client *http.Client + client, err = google.DefaultClient(oauth2.NoContext, compute.ComputeScope) + if err != nil { + continue + } + + cs, err = compute.New(client) + if err != nil { + continue + } + return cs, nil + } + return nil, err +} + func deleteInstance(image string) { _, err := computeService.Instances.Delete(*project, *zone, imageToInstanceName(image)).Do() if err != nil {