Node e2e Makefile support for running remote tests against kubernetes-node-e2e-images.

Also includes other improvements: - Makefile rule to run tests against remote instance using existing host or image - Makefile will reuse an instance created from an image if it was not torn down - Runner starts gce instances in parallel with building source - Runner uses instance ip instead of hostname so that it doesn't need to resolve - Runner supports cleaning up files and processes on an instance without stopping / deleting it - Runner runs tests using `ginkgo` binary to support running tests in parallel
2016-06-03 17:50:21 -07:00 · 2016-06-03 17:50:21 -07:00 · e94e1c6e3d
parent 0d3be6a316
commit e94e1c6e3d
10 changed files with 461 additions and 192 deletions
--- a/20
+++ b/20
@ -93,16 +93,28 @@ test_e2e:
 # Build and run node end-to-end tests.
 #
 # Args:
-#  FOCUS: regexp that matches the tests to be run
+#  FOCUS: regexp that matches the tests to be run.  Defaults to "".
-#  SKIP: regexp that matches the tests that needs to be skipped
+#  SKIP: regexp that matches the tests that needs to be skipped.  Defaults to "".
 #  RUN_UNTIL_FAILURE: Ff true, pass --untilItFails to ginkgo so tests are run repeatedly until they fail.  Defaults to false.
 #  REMOTE: If true, run the tests on a remote host instance on GCE.  Defaults to false.
 #  IMAGES: for REMOTE=true only.  Comma delimited list of images for creating remote hosts to run tests against.  Defaults to "e2e-node-containervm-v20160321-image".
 #  LIST_IMAGES: If true, don't run tests.  Just output the list of available images for testing.  Defaults to false.
 #  HOSTS: for REMOTE=true only.  Comma delimited list of running gce hosts to run tests against.  Defaults to "".
 #  DELETE_INSTANCES: for REMOTE=true only.  Delete any instances created as part of this test run.  Defaults to false.
 #  ARTIFACTS: for REMOTE=true only.  Local directory to scp test artifacts into from the remote hosts.  Defaults to ""/tmp/_artifacts".
 #  REPORT: for REMOTE=false only.  Local directory to write juntil xml results to.  Defaults to "/tmp/".
 #  CLEANUP: for REMOTE=true only.  If false, do not stop processes or delete test files on remote hosts.  Defaults to true.
 #  IMAGE_PROJECT: for REMOTE=true only.  Project containing images provided to IMAGES.  Defaults to "kubernetes-node-e2e-images".
 #  INSTANCE_PREFIX: for REMOTE=true only.  Instances created from images will have the name "${INSTANCE_PREFIX}-${IMAGE_NAME}".  Defaults to "test"/
 #
 # Example:
 #   make test_e2e_node FOCUS=kubelet SKIP=container
 #   make test_e2e_node REMOTE=true DELETE_INSTANCES=true
 # Build and run tests.
 test_e2e_node:
-	hack/e2e-node-test.sh FOCUS=$(FOCUS) SKIP=$(SKIP)
+	hack/e2e-node-test.sh
 .PHONY: test_e2e_node
 # Remove all build artifacts.
 #
 # Example:
--- a/docs/devel/e2e-node-tests.md
+++ b/docs/devel/e2e-node-tests.md
@ -34,147 +34,187 @@ Documentation for other releases can be found at
 # Node End-To-End tests
-Node e2e tests start kubelet and minimal supporting infrastructure to validate
+Node e2e tests are component tests meant for testing the Kubelet code on a custom host environment.
-the kubelet on a host. Tests can be run either locally, against a remote host or
+
-against a GCE image.
+Tests can be run either locally or against a host running on GCE.
 Node e2e tests are run as both pre- and post- submit tests by the Kubernetes project.
 *Note: Linux only. Mac and Windows unsupported.*
-## Running tests locally
+# Running tests
-etcd must be installed and on the PATH to run the node e2e tests.  To verify
+## Locally
 etcd is installed: `which etcd`. You can find instructions for installing etcd
 [on the etcd releases page](https://github.com/coreos/etcd/releases).
-Run the tests locally: `make test_e2e_node`
+Why run tests *Locally*?  Much faster than running tests Remotely.
-Running the node e2e tests locally will build the kubernetes go source files and
+Prerequisites:
-then start the kubelet, kube-apiserver, and etcd binaries on localhost before
+- [Install etcd](https://github.com/coreos/etcd/releases) on your PATH
-executing the ginkgo tests under test/e2e_node against the local kubelet
+  - Verify etcd is installed correctly by running `which etcd`
-instance.
+- [Install ginkgo](https://github.com/onsi/ginkgo) on your PATH
  - Verify ginkgo is installed correctly by running `which ginkgo`
-## Running tests against a remote host
+From the Kubernetes base directory, run:
 The node e2e tests can be run against one or more remote hosts using one of:
 * [e2e-node-jenkins.sh](../../test/e2e_node/jenkins/e2e-node-jenkins.sh) (gce
 only)
 * [run_e2e.go](../../test/e2e_node/runner/run_e2e.go) (requires passwordless ssh
 and remote passwordless sudo access over ssh)
 * using [run_e2e.go](../../test/e2e_node/runner/run_e2e.go) to build a tar.gz
 and executing on host (requires host access w/ remote sudo)
 ### Option 1: Configuring a new remote host from scratch for testing
 The host must contain an environment capable of running a minimal kubernetes cluster
 consisting of etcd, the kube-apiserver, and kubelet. The steps required to step a host vary between distributions
 (coreos, rhel, ubuntu, etc), but may include:
 * install etcd
 * install docker
 * add user running tests to docker group
 * install lxc and update grub commandline
 * enable tty-less sudo access
 These steps should be captured in [setup_host.sh](../../test/e2e_node/environment/setup_host.sh)
 ### Option 2: Copying an existing host image from another project
 If there is an existing image in another project you would like to use, you can use the script
 [copy-e2e-image.sh](../../test/e2e_node/jenkins/copy-e2e-image.sh) to copy an image
 from one GCE project to another.
 ```sh
-copy-e2e-image.sh <image-to-be-copied-name> <from-gce-project> <to-gce-project>
+make test_e2e_node
 ```
-### Running the tests
+This will: run the *ginkgo* binary against the subdirectory *test/e2e_node*, which will in turn:
 - Ask for sudo access (needed for running some of the processes)
 - Build the Kubernetes source code
 - Pre-pull docker images used by the tests
 - Start a local instance of *etcd*
 - Start a local instance of *kube-apiserver*
 - Start a local instance of *kubelet*
 - Run the test using the locally started processes
 - Output the test results to STDOUT
 - Stop *kubelet*, *kube-apiserver*, and *etcd*
-1. If running tests against a running host on gce
+## Remotely
-  * Make sure host names are resolvable to ssh by running `gcloud compute config-ssh` to
+Why Run tests *Remotely*?  Tests will be run in a customized pristine environment.  Closely mimics what will be done
-    update ~/.ssh/config with the GCE hosts.  After running this command, check the hostnames
+as pre- and post- submit testing performed by the project.
    in the ~/.ssh/config file and verify you have the correct access by running `ssh <host>`.
-  * Copy [template.properties](../../test/e2e_node/jenkins/template.properties)
+Prerequisites:
 - [join the googlegroup](https://groups.google.com/forum/#!forum/kubernetes-dev)
 `kubernetes-dev@googlegroups.com`
  - *This provides read access to the node test images.*
 - Setup a [Google Cloud Platform](https://cloud.google.com/) account and project with Google Compute Engine enabled
 - Install and setup the [gcloud sdk](https://cloud.google.com/sdk/downloads)
  - Verify the sdk is setup correctly by running `gcloud compute instances list` and `gcloud compute images list --project kubernetes-node-e2e-images`
-    * Fill in `GCE_HOSTS` with the name of the host
+Run:
-  * Run `test/e2e_node/jenkins/e2e-node-jenkins.sh <path to properties file>`
+```sh
-    * **Must be run from kubernetes root**
+make test_e2e_node REMOTE=true
 ```
-2. If running against a host anywhere else
+This will:
 - Build the Kubernetes source code
 - Create a new GCE instance using the default test image
  - Instance will be called **test-e2e-node-containervm-v20160321-image**
 - Lookup the instance public ip address
 - Copy a compressed archive file to the host containing the following binaries:
  - ginkgo
  - kubelet
  - kube-apiserver
  - e2e_node.test (this binary contains the actual tests to be run)
 - Unzip the archive to a directory under **/tmp/gcloud**
 - Run the tests using the `ginkgo` command
  - Starts etcd, kube-apiserver, kubelet
  - The ginkgo command is used because this supports more features than running the test binary directly
 - Output the remote test results to STDOUT
 - `scp` the log files back to the local host under /tmp/_artifacts/e2e-node-containervm-v20160321-image
 - Stop the processes on the remote host
 - **Leave the GCE instance running**
-  * **Requires password-less ssh and sudo access**
+**Note: Subsequent tests run using the same image will *reuse the existing host* instead of deleting it and
-
+provisioning a new one.  To delete the GCE instance after each test see
-    * Make sure this works - e.g. `ssh <hostname> -- sudo echo "ok"`
+*[DELETE_INSTANCE](#delete-instance-after-tests-run)*.**
    * If ssh flags are required (e.g. `-i`), they can be used and passed to the
 tests with `--ssh-options`
  * `go run test/e2e_node/runner/run_e2e.go --logtostderr --hosts <comma
 separated hosts>`
    * **Must be run from kubernetes root**
 3. Alternatively, manually build and copy `e2e_node_test.tar.gz` to a remote
 host
  * Build the tar.gz `go run test/e2e_node/runner/run_e2e.go --logtostderr
 --build-only`
  * Copy `e2e_node_test.tar.gz` to the remote host
  * Extract the archive on the remote host `tar -xzvf e2e_node_test.tar.gz`
  * Run the tests `./e2e_node.test --logtostderr --vmodule=*=2
 --build-services=false --node-name=<hostname>`
      * Note: This must be run from the directory containing the kubelet and
 kube-apiserver binaries.
 ## Running tests against a gce image
 * Option 1: Build a gce image from a prepared gce host
  * Create the host from a base image and configure it (see above)
    * Run tests against this remote host to ensure that it is setup correctly
 before doing anything else
  * Create a gce *snapshot* of the instance
  * Create a gce *disk* from the snapshot
  * Create a gce *image* from the disk
 * Option 2: Copy a prepared image from another project
  * Instructions above
 * Test that the necessary gcloud credentials are setup for the project
  * `gcloud compute --project <project> --zone <zone> images list`
  * Verify that your image appears in the list
 * Copy [template.properties](../../test/e2e_node/jenkins/template.properties)
  * Fill in `GCE_PROJECT`, `GCE_ZONE`, `GCE_IMAGES`
 * Run `test/e2e_node/jenkins/e2e-node-jenkins.sh <path to properties file>`
  * **Must be run from kubernetes root**
 ## Kubernetes Jenkins CI and PR builder
 Node e2e tests are run against a static list of host environments continuously
 or when manually triggered on a github.com pull requests using the trigger
 phrase `@k8s-bot test node e2e`
 ### CI Host environments
 TBD
 ### PR builder host environments
 | linux distro    | distro version | docker version | etcd version | cloud provider |
 |-----------------|----------------|----------------|--------------|----------------|
 | containervm     |                | 1.8            |              | gce            |
 | coreos          | stable         | 1.8            |              | gce            |
 | debian          | jessie         | 1.10           |              | gce            |
 | ubuntu          | trusty         | 1.8            |              | gce            |
 | ubuntu          | trusty         | 1.9            |              | gce            |
 | ubuntu          | trusty         | 1.10           |              | gce            |
 # Additional Remote Options
 ## Run tests using different images
 This is useful if you want to run tests against a host using a different OS distro or container runtime than
 provided by the default image.
 List the available test images using gcloud.
 ```sh
 make test_e2e_node LIST_IMAGES=true
 ```
 This will output a list of the available images for the default image project.
 Then run:
 ```sh
 make test_e2e_node REMOTE=true IMAGES="<comma-separated-list-images>"
 ```
 ## Run tests against a running GCE instance (not an image)
 This is useful if you have an host instance running already and want to run the tests there instead of on a new instance.
 ```sh
 make test_e2e_node REMOTE=true HOSTS="<comma-separated-list-of-hostnames>"
 ```
 ## Delete instance after tests run
 This is useful if you want recreate the instance for each test run to trigger flakes related to starting the instance.
 ```sh
 make test_e2e_node REMOTE=true DELETE_INSTANCES=true
 ```
 ## Keep instance, test binaries, and *processes* around after tests run
 This is useful if you want to manually inspect or debug the kubelet process run as part of the tests.
 ```sh
 make test_e2e_node REMOTE=true CLEANUP=false
 ```
 ## Run tests using an image in another project
 This is useful if you want to create your own host image in another project and use it for testing.
 ```sh
 make test_e2e_node REMOTE=true IMAGE_PROJECT="<name-of-project-with-images>" IMAGES="<image-name>"
 ```
 Setting up your own host image may require additional steps such as installing etcd or docker.  See
 [setup_host.sh](../../test/e2e_node/environment/setup_host.sh) for common steps to setup hosts to run node tests.
 ## Create instances using a different instance name prefix
 This is useful if you want to create instances using a different name so that you can run multiple copies of the
 test in parallel against different instances of the same image.
 ```sh
 make test_e2e_node REMOTE=true INSTANCE_PREFIX="my-prefix"
 ```
 # Additional Test Options for both Remote and Local execution
 ## Only run a subset of the tests
 To run tests matching a regex:
 ```sh
 make test_e2e_node REMOTE=true FOCUS="<regex-to-match>"
 ```
 To run tests NOT matching a regex:
 ```sh
 make test_e2e_node REMOTE=true SKIP="<regex-to-match>"
 ```
 ## Run tests continually until they fail
 This is useful if you are trying to debug a flaky test failure.  This will cause ginkgo to continually
 run the tests until they fail.  **Note: this will only perform test setup once (e.g. creating the instance) and is
 less useful for catching flakes related creating the instance from an image.**
 ```sh
 make test_e2e_node REMOTE=true RUN_UNTIL_FAILURE=true
 ```
 # Notes on tests run by the Kubernetes project during pre-, post- submit.
 The node e2e tests are run by the PR builder for each Pull Request and the results published at
 the bottom of the comments section.  To re-run just the node e2e tests from the PR builder add the comment
 `@k8s-bot node e2e test this issue: #<Flake-Issue-Number or IGNORE>` and **include a link to the test
 failure logs if caused by a flake.**
 The PR builder runs tests against the images listed in [jenkins-pull.properties](../../test/e2e_node/jenkins/jenkins-pull.properties)
 The post submit tests run against the images listed in [jenkins-ci.properties](../../test/e2e_node/jenkins/jenkins-ci.properties)
 <!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
--- a/hack/e2e-node-test.sh
+++ b/hack/e2e-node-test.sh
@ -20,6 +20,24 @@ source "${KUBE_ROOT}/hack/lib/init.sh"
 focus=${FOCUS:-""}
 skip=${SKIP:-""}
 report=${REPORT:-"/tmp/"}
 artifacts=${ARTIFACTS:-"/tmp/_artifacts"}
 remote=${REMOTE:-"false"}
 images=${IMAGES:-""}
 hosts=${HOSTS:-""}
 if [[ $hosts == "" && $images == "" ]]; then
  images="e2e-node-containervm-v20160321-image"
 fi
 image_project=${IMAGE_PROJECT:-"kubernetes-node-e2e-images"}
 instance_prefix=${INSTANCE_PREFIX:-"test"}
 cleanup=${CLEANUP:-"true"}
 delete_instances=${DELETE_INSTANCES:-"false"}
 run_until_failure=${RUN_UNTIL_FAILURE:-"false"}
 list_images=${LIST_IMAGES:-"false"}
 if  [[ $list_images == "true" ]]; then
  gcloud compute images list --project="${image_project}" | grep "e2e-node"
  exit 0
 fi
 ginkgo=$(kube::util::find-binary "ginkgo")
 if [[ -z "${ginkgo}" ]]; then
@ -27,12 +45,90 @@ if [[ -z "${ginkgo}" ]]; then
  exit 1
 fi
-# Refresh sudo credentials if not running on GCE.
+if [ $remote = true ] ; then
-if ! ping -c 1 -q metadata.google.internal &> /dev/null; then
+  # Setup the directory to copy test artifacts (logs, junit.xml, etc) from remote host to local host
-  sudo -v || exit 1
+  if [ ! -d "${artifacts}" ]; then
    echo "Creating artifacts directory at ${artifacts}"
    mkdir -p ${artifacts}
  fi
  echo "Test artifacts will be written to ${artifacts}"
  # Get the compute zone
  zone=$(gcloud info --format='value(config.properties.compute.zone)')
  if [[ $zone == "" ]]; then
    echo "Could not find gcloud compute/zone when running:\ngcloud info --format='value(config.properties.compute.zone)'"
    exit 1
  fi
  # Get the compute project
  project=$(gcloud info --format='value(config.project)')
  if [[ $project == "" ]]; then
    echo "Could not find gcloud project when running:\ngcloud info --format='value(config.project)'"
    exit 1
  fi
  # Check if any of the images specified already have running instances.  If so reuse those instances
  # by moving the IMAGE to a HOST
  if [[ $images != "" ]]; then
  IFS=',' read -ra IM <<< "$images"
       images=""
       for i in "${IM[@]}"; do
         if [[ $(gcloud compute instances list "${instance_prefix}-$i" | grep $i) ]]; then
           if [[ $hosts != "" ]]; then
             hosts="$hosts,"
           fi
           echo "Reusing host ${instance_prefix}-$i"
           hosts="${hosts}${instance_prefix}-${i}"
         else
           if [[ $images != "" ]]; then
             images="$images,"
           fi
           images="$images$i"
         fi
       done
  fi
  # Parse the flags to pass to ginkgo
  ginkgoflags=""
  if [[ $focus != "" ]]; then
     ginkgoflags="$ginkgoflags -focus=$focus "
  fi
  if [[ $skip != "" ]]; then
     ginkgoflags="$ginkgoflags -skip=$skip "
  fi
  if [[ $run_until_failure != "" ]]; then
     ginkgoflags="$ginkgoflags -untilItFails=$run_until_failure "
  fi
  # Output the configuration we will try to run
  echo "Running tests remotely using"
  echo "Project: $project"
  echo "Image Project: $image_project"
  echo "Compute/Zone: $zone"
  echo "Images: $images"
  echo "Hosts: $hosts"
  echo "Ginkgo Flags: $ginkgoflags"
  # Invoke the runner
  go run test/e2e_node/runner/run_e2e.go  --logtostderr --vmodule=*=2 --ssh-env="gce" \
    --zone="$zone" --project="$project"  \
    --hosts="$hosts" --images="$images" --cleanup="$cleanup" \
    --results-dir="$artifacts" --ginkgo-flags="$ginkgoflags" \
    --image-project="$image_project" --instance-name-prefix="$instance_prefix" --setup-node="true" \
    --delete-instances="$delete_instances"
  exit $?
 else
  # Refresh sudo credentials if not running on GCE.
  if ! ping -c 1 -q metadata.google.internal &> /dev/null; then
    sudo -v || exit 1
  fi
  # Test using the host the script was run on
  # Provided for backwards compatibility
  "${ginkgo}" --focus=$focus --skip=$skip "${KUBE_ROOT}/test/e2e_node/" --report-dir=${report} \
    -- --alsologtostderr --v 2 --node-name $(hostname) --build-services=true --start-services=true --stop-services=true
  exit $?
 fi
 # Provided for backwards compatibility
 "${ginkgo}" --focus=$focus --skip=$skip "${KUBE_ROOT}/test/e2e_node/" --report-dir=${report} -- --alsologtostderr --v 2 --node-name $(hostname) --build-services=true --start-services=true --stop-services=true
 exit $?
--- a/hack/verify-flags/known-flags.txt
+++ b/hack/verify-flags/known-flags.txt
@ -88,6 +88,7 @@ default-container-cpu-limit
 default-container-mem-limit
 delay-shutdown
 delete-collection-workers
 delete-instances
 delete-namespace
 deleting-pods-burst
 deleting-pods-qps
--- a/test/e2e_node/e2e_build.go
+++ b/test/e2e_node/e2e_build.go
@ -120,7 +120,7 @@ func getK8sNodeTestDir() (string, error) {
 func getKubeletServerBin() string {
 	bin, err := getK8sBin("kubelet")
 	if err != nil {
-		panic(fmt.Sprintf("Could not locate kubelet binary."))
+		glog.Fatalf("Could not locate kubelet binary %v.", err)
 	}
 	return bin
 }
@ -128,7 +128,7 @@ func getKubeletServerBin() string {
 func getApiServerBin() string {
 	bin, err := getK8sBin("kube-apiserver")
 	if err != nil {
-		panic(fmt.Sprintf("Could not locate kube-apiserver binary."))
+		glog.Fatalf("Could not locate kube-apiserver binary %v.", err)
 	}
 	return bin
 }
--- a/test/e2e_node/e2e_remote.go
+++ b/test/e2e_node/e2e_remote.go
@ -26,6 +26,7 @@ import (
 	"os/user"
 	"path/filepath"
 	"strings"
 	"sync"
 	"github.com/golang/glog"
 	utilerrors "k8s.io/kubernetes/pkg/util/errors"
@ -41,6 +42,11 @@ var sshOptionsMap map[string]string
 const archiveName = "e2e_node_test.tar.gz"
 var hostnameIpOverrides = struct {
 	sync.RWMutex
 	m map[string]string
 }{m: make(map[string]string)}
 func init() {
 	usr, err := user.Current()
 	if err != nil {
@ -51,9 +57,24 @@ func init() {
 	}
 }
 func AddHostnameIp(hostname, ip string) {
 	hostnameIpOverrides.Lock()
 	defer hostnameIpOverrides.Unlock()
 	hostnameIpOverrides.m[hostname] = ip
 }
 func GetHostnameOrIp(hostname string) string {
 	hostnameIpOverrides.RLock()
 	defer hostnameIpOverrides.RUnlock()
 	if ip, found := hostnameIpOverrides.m[hostname]; found {
 		return ip
 	}
 	return hostname
 }
 // CreateTestArchive builds the local source and creates a tar archive e2e_node_test.tar.gz containing
 // the binaries k8s required for node e2e tests
-func CreateTestArchive() string {
+func CreateTestArchive() (string, error) {
 	// Build the executables
 	buildGo()
@ -65,50 +86,57 @@ func CreateTestArchive() string {
 	ginkgoTest := filepath.Join(buildOutputDir, "e2e_node.test")
 	if _, err := os.Stat(ginkgoTest); err != nil {
-		glog.Fatalf("Failed to locate test binary %s", ginkgoTest)
+		return "", fmt.Errorf("failed to locate test binary %s", ginkgoTest)
 	}
 	kubelet := filepath.Join(buildOutputDir, "kubelet")
 	if _, err := os.Stat(kubelet); err != nil {
-		glog.Fatalf("Failed to locate binary %s", kubelet)
+		return "", fmt.Errorf("failed to locate binary %s", kubelet)
 	}
 	apiserver := filepath.Join(buildOutputDir, "kube-apiserver")
 	if _, err := os.Stat(apiserver); err != nil {
-		glog.Fatalf("Failed to locate binary %s", apiserver)
+		return "", fmt.Errorf("failed to locate binary %s", apiserver)
 	}
 	ginkgo := filepath.Join(buildOutputDir, "ginkgo")
 	if _, err := os.Stat(apiserver); err != nil {
 		return "", fmt.Errorf("failed to locate binary %s", ginkgo)
 	}
 	glog.Infof("Building archive...")
 	tardir, err := ioutil.TempDir("", "node-e2e-archive")
 	if err != nil {
-		glog.Fatalf("Failed to create temporary directory %v.", err)
+		return "", fmt.Errorf("failed to create temporary directory %v.", err)
 	}
 	defer os.RemoveAll(tardir)
 	// Copy binaries
 	out, err := exec.Command("cp", ginkgoTest, filepath.Join(tardir, "e2e_node.test")).CombinedOutput()
 	if err != nil {
-		glog.Fatalf("Failed to copy e2e_node.test %v.", err)
+		return "", fmt.Errorf("failed to copy e2e_node.test %v.", err)
 	}
 	out, err = exec.Command("cp", kubelet, filepath.Join(tardir, "kubelet")).CombinedOutput()
 	if err != nil {
-		glog.Fatalf("Failed to copy kubelet %v.", err)
+		return "", fmt.Errorf("failed to copy kubelet %v.", err)
 	}
 	out, err = exec.Command("cp", apiserver, filepath.Join(tardir, "kube-apiserver")).CombinedOutput()
 	if err != nil {
-		glog.Fatalf("Failed to copy kube-apiserver %v.", err)
+		return "", fmt.Errorf("failed to copy kube-apiserver %v.", err)
 	}
 	out, err = exec.Command("cp", ginkgo, filepath.Join(tardir, "ginkgo")).CombinedOutput()
 	if err != nil {
 		return "", fmt.Errorf("failed to copy ginkgo %v.", err)
 	}
 	// Build the tar
 	out, err = exec.Command("tar", "-zcvf", archiveName, "-C", tardir, ".").CombinedOutput()
 	if err != nil {
-		glog.Fatalf("Failed to build tar %v.  Output:\n%s", err, out)
+		return "", fmt.Errorf("failed to build tar %v.  Output:\n%s", err, out)
 	}
 	dir, err := os.Getwd()
 	if err != nil {
-		glog.Fatalf("Failed to get working directory %v.", err)
+		return "", fmt.Errorf("failed to get working directory %v.", err)
 	}
-	return filepath.Join(dir, archiveName)
+	return filepath.Join(dir, archiveName), nil
 }
 // Returns the command output, whether the exit was ok, and any errors
@ -118,31 +146,31 @@ func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, s
 		if err != nil {
 			return "", false, fmt.Errorf("could not find username: %v", err)
 		}
-		output, err := RunSshCommand("ssh", host, "--", "sudo", "usermod", "-a", "-G", "docker", uname.Username)
+		output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sudo", "usermod", "-a", "-G", "docker", uname.Username)
 		if err != nil {
-			return "", false, fmt.Errorf("Instance %s not running docker daemon - Command failed: %s", host, output)
+			return "", false, fmt.Errorf("instance %s not running docker daemon - Command failed: %s", host, output)
 		}
 	}
 	// Create the temp staging directory
 	glog.Infof("Staging test binaries on %s", host)
 	tmp := fmt.Sprintf("/tmp/gcloud-e2e-%d", rand.Int31())
-	_, err := RunSshCommand("ssh", host, "--", "mkdir", tmp)
+	_, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "mkdir", tmp)
 	if err != nil {
 		// Exit failure with the error
 		return "", false, err
 	}
 	if cleanup {
 		defer func() {
-			output, err := RunSshCommand("ssh", host, "--", "rm", "-rf", tmp)
+			output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "rm", "-rf", tmp)
 			if err != nil {
-				glog.Errorf("Failed to cleanup tmp directory %s on host %v.  Output:\n%s", tmp, err, output)
+				glog.Errorf("failed to cleanup tmp directory %s on host %v.  Output:\n%s", tmp, err, output)
 			}
 		}()
 	}
 	// Copy the archive to the staging directory
-	_, err = RunSshCommand("scp", archive, fmt.Sprintf("%s:%s/", host, tmp))
+	_, err = RunSshCommand("scp", archive, fmt.Sprintf("%s:%s/", GetHostnameOrIp(host), tmp))
 	if err != nil {
 		// Exit failure with the error
 		return "", false, err
@ -158,12 +186,12 @@ func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, s
 	// If we are unable to stop existing running k8s processes, we should see messages in the kubelet/apiserver/etcd
 	// logs about failing to bind the required ports.
 	glog.Infof("Killing any existing node processes on %s", host)
-	RunSshCommand("ssh", host, "--", "sh", "-c", cmd)
+	RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd)
 	// Extract the archive
 	cmd = getSshCommand(" && ", fmt.Sprintf("cd %s", tmp), fmt.Sprintf("tar -xzvf ./%s", archiveName))
 	glog.Infof("Extracting tar on %s", host)
-	output, err := RunSshCommand("ssh", host, "--", "sh", "-c", cmd)
+	output, err := RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd)
 	if err != nil {
 		// Exit failure with the error
 		return "", false, err
@ -172,12 +200,13 @@ func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, s
 	// Run the tests
 	cmd = getSshCommand(" && ",
 		fmt.Sprintf("cd %s", tmp),
-		fmt.Sprintf("timeout -k 30s %ds ./e2e_node.test --logtostderr --v 2 --build-services=false --stop-services=%t --node-name=%s --report-dir=%s/results --junit-file-number=%d %s", *testTimeoutSeconds, cleanup, host, tmp, junitFileNumber, *ginkgoFlags),
+		fmt.Sprintf("timeout -k 30s %ds ./ginkgo %s ./e2e_node.test -- --logtostderr --v 2 --build-services=false --stop-services=%t --node-name=%s --report-dir=%s/results --junit-file-number=%d", *testTimeoutSeconds, *ginkgoFlags, cleanup, host, tmp, junitFileNumber),
 	)
 	aggErrs := []error{}
 	glog.Infof("Starting tests on %s", host)
-	output, err = RunSshCommand("ssh", host, "--", "sh", "-c", cmd)
+	output, err = RunSshCommand("ssh", GetHostnameOrIp(host), "--", "sh", "-c", cmd)
 	if err != nil {
 		aggErrs = append(aggErrs, err)
 	}
@ -195,13 +224,13 @@ func RunRemote(archive string, host string, cleanup bool, junitFileNumber int, s
 }
 func getTestArtifacts(host, testDir string) error {
-	_, err := RunSshCommand("scp", "-r", fmt.Sprintf("%s:%s/results/", host, testDir), fmt.Sprintf("%s/%s", *resultsDir, host))
+	_, err := RunSshCommand("scp", "-r", fmt.Sprintf("%s:%s/results/", GetHostnameOrIp(host), testDir), fmt.Sprintf("%s/%s", *resultsDir, host))
 	if err != nil {
 		return err
 	}
 	// Copy junit to the top of artifacts
-	_, err = RunSshCommand("scp", fmt.Sprintf("%s:%s/results/junit*", host, testDir), fmt.Sprintf("%s/", *resultsDir))
+	_, err = RunSshCommand("scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIp(host), testDir), fmt.Sprintf("%s/", *resultsDir))
 	if err != nil {
 		return err
 	}
@ -223,7 +252,7 @@ func RunSshCommand(cmd string, args ...string) (string, error) {
 	}
 	output, err := exec.Command(cmd, args...).CombinedOutput()
 	if err != nil {
-		return fmt.Sprintf("%s", output), fmt.Errorf("Command [%s %s] failed with error: %v and output:\n%s", cmd, strings.Join(args, " "), err, output)
+		return fmt.Sprintf("%s", output), fmt.Errorf("command [%s %s] failed with error: %v and output:\n%s", cmd, strings.Join(args, " "), err, output)
 	}
 	return fmt.Sprintf("%s", output), nil
 }
--- a/test/e2e_node/jenkins/e2e-node-jenkins.sh
+++ b/test/e2e_node/jenkins/e2e-node-jenkins.sh
@ -29,7 +29,10 @@ set -x
 . $1
 go build test/e2e_node/environment/conformance.go
 WORKSPACE=${WORKSPACE:-"/tmp/"}
 ARTIFACTS=${WORKSPACE}/_artifacts
 mkdir -p ${ARTIFACTS}
 go run test/e2e_node/runner/run_e2e.go  --logtostderr --vmodule=*=2 --ssh-env="gce" \
  --zone="$GCE_ZONE" --project="$GCE_PROJECT" --image-project="$GCE_IMAGE_PROJECT" \
--- a/test/e2e_node/jenkins/jenkins-ci.properties
+++ b/test/e2e_node/jenkins/jenkins-ci.properties
@ -11,6 +11,6 @@ GCE_ZONE=us-central1-f
 GCE_PROJECT=kubernetes-jenkins
 GCE_IMAGE_PROJECT=kubernetes-jenkins
 CLEANUP=true
-GINKGO_FLAGS=--ginkgo.skip=FLAKY
+GINKGO_FLAGS=--skip=FLAKY
 SETUP_NODE=false
--- a/test/e2e_node/jenkins/jenkins-pull.properties
+++ b/test/e2e_node/jenkins/jenkins-pull.properties
@ -11,5 +11,5 @@ GCE_ZONE=us-central1-f
 GCE_PROJECT=kubernetes-jenkins-pull
 GCE_IMAGE_PROJECT=kubernetes-jenkins-pull
 CLEANUP=true
-GINKGO_FLAGS=--ginkgo.skip=FLAKY
+GINKGO_FLAGS=--skip=FLAKY
 SETUP_NODE=false
--- a/test/e2e_node/runner/run_e2e.go
+++ b/test/e2e_node/runner/run_e2e.go
@ -27,6 +27,7 @@ import (
 	"net/http"
 	"os"
 	"strings"
 	"sync"
 	"time"
 	"k8s.io/kubernetes/test/e2e_node"
@ -45,11 +46,20 @@ var imageProject = flag.String("image-project", "", "gce project the hosts live
 var images = flag.String("images", "", "images to test")
 var hosts = flag.String("hosts", "", "hosts to test")
 var cleanup = flag.Bool("cleanup", true, "If true remove files from remote hosts and delete temporary instances")
 var deleteInstances = flag.Bool("delete-instances", true, "If true, delete any instances created")
 var buildOnly = flag.Bool("build-only", false, "If true, build e2e_node_test.tar.gz and exit.")
 var setupNode = flag.Bool("setup-node", false, "When true, current user will be added to docker group on the test machine")
 var computeService *compute.Service
 type Archive struct {
 	sync.Once
 	path string
 	err  error
 }
 var arc Archive
 type TestResult struct {
 	output string
 	err    error
@ -94,35 +104,22 @@ func main() {
 		noColour = "\033[0m"
 	}
-	archive := e2e_node.CreateTestArchive()
+	go arc.getArchive()
-	defer os.Remove(archive)
+	defer arc.deleteArchive()
 	var err error
 	computeService, err = getComputeClient()
 	if err != nil {
 		glog.Fatalf("Unable to create gcloud compute service using defaults.  Make sure you are authenticated. %v", err)
 	}
 	results := make(chan *TestResult)
 	running := 0
 	if *images != "" {
 		// Setup the gce client for provisioning instances
 		// Getting credentials on gce jenkins is flaky, so try a couple times
 		var err error
 		for i := 0; i < 10; i++ {
 			var client *http.Client
 			client, err = google.DefaultClient(oauth2.NoContext, compute.ComputeScope)
 			if err != nil {
 				continue
 			}
 			computeService, err = compute.New(client)
 			if err != nil {
 				continue
 			}
 			time.Sleep(time.Second * 6)
 		}
 		if err != nil {
 			glog.Fatalf("Unable to create gcloud compute service using defaults.  Make sure you are authenticated. %v", err)
 		}
 		for _, image := range strings.Split(*images, ",") {
 			running++
 			fmt.Printf("Initializing e2e tests using image %s.\n", image)
-			go func(image string, junitFileNum int) { results <- testImage(image, archive, junitFileNum) }(image, running)
+			go func(image string, junitFileNum int) { results <- testImage(image, junitFileNum) }(image, running)
 		}
 	}
 	if *hosts != "" {
@ -130,7 +127,7 @@ func main() {
 			fmt.Printf("Initializing e2e tests using host %s.\n", host)
 			running++
 			go func(host string, junitFileNum int) {
-				results <- testHost(host, archive, *cleanup, junitFileNum, *setupNode)
+				results <- testHost(host, *cleanup, junitFileNum, *setupNode)
 			}(host, running)
 		}
 	}
@ -159,9 +156,51 @@ func main() {
 	}
 }
 func (a *Archive) getArchive() (string, error) {
 	a.Do(func() { a.path, a.err = e2e_node.CreateTestArchive() })
 	return a.path, a.err
 }
 func (a *Archive) deleteArchive() {
 	path, err := a.getArchive()
 	if err != nil {
 		return
 	}
 	os.Remove(path)
 }
 // Run tests in archive against host
-func testHost(host, archive string, deleteFiles bool, junitFileNum int, setupNode bool) *TestResult {
+func testHost(host string, deleteFiles bool, junitFileNum int, setupNode bool) *TestResult {
-	output, exitOk, err := e2e_node.RunRemote(archive, host, deleteFiles, junitFileNum, setupNode)
+	instance, err := computeService.Instances.Get(*project, *zone, host).Do()
 	if err != nil {
 		return &TestResult{
 			err:    err,
 			host:   host,
 			exitOk: false,
 		}
 	}
 	if strings.ToUpper(instance.Status) != "RUNNING" {
 		err = fmt.Errorf("instance %s not in state RUNNING, was %s.", host, instance.Status)
 		return &TestResult{
 			err:    err,
 			host:   host,
 			exitOk: false,
 		}
 	}
 	externalIp := getExternalIp(instance)
 	if len(externalIp) > 0 {
 		e2e_node.AddHostnameIp(host, externalIp)
 	}
 	path, err := arc.getArchive()
 	if err != nil {
 		// Don't log fatal because we need to do any needed cleanup contained in "defer" statements
 		return &TestResult{
 			err: fmt.Errorf("unable to create test archive %v.", err),
 		}
 	}
 	output, exitOk, err := e2e_node.RunRemote(path, host, deleteFiles, junitFileNum, setupNode)
 	return &TestResult{
 		output: output,
 		err:    err,
@ -172,17 +211,21 @@ func testHost(host, archive string, deleteFiles bool, junitFileNum int, setupNod
 // Provision a gce instance using image and run the tests in archive against the instance.
 // Delete the instance afterward.
-func testImage(image, archive string, junitFileNum int) *TestResult {
+func testImage(image string, junitFileNum int) *TestResult {
 	host, err := createInstance(image)
-	if *cleanup {
+	if *deleteInstances {
 		defer deleteInstance(image)
 	}
 	if err != nil {
 		return &TestResult{
-			err: fmt.Errorf("Unable to create gce instance with running docker daemon for image %s.  %v", image, err),
+			err: fmt.Errorf("unable to create gce instance with running docker daemon for image %s.  %v", image, err),
 		}
 	}
-	return testHost(host, archive, false, junitFileNum, *setupNode)
+
 	// Only delete the files if we are keeping the instance and want it cleaned up.
 	// If we are going to delete the instance, don't bother with cleaning up the files
 	deleteFiles := !*deleteInstances && *cleanup
 	return testHost(host, deleteFiles, junitFileNum, *setupNode)
 }
 // Provision a gce instance using image
@ -216,7 +259,7 @@ func createInstance(image string) (string, error) {
 		return "", err
 	}
 	if op.Error != nil {
-		return "", fmt.Errorf("Could not create instance %s: %+v", name, op.Error)
+		return "", fmt.Errorf("could not create instance %s: %+v", name, op.Error)
 	}
 	instanceRunning := false
@ -230,17 +273,21 @@ func createInstance(image string) (string, error) {
 			continue
 		}
 		if strings.ToUpper(instance.Status) != "RUNNING" {
-			err = fmt.Errorf("Instance %s not in state RUNNING, was %s.", name, instance.Status)
+			err = fmt.Errorf("instance %s not in state RUNNING, was %s.", name, instance.Status)
 			continue
 		}
 		externalIp := getExternalIp(instance)
 		if len(externalIp) > 0 {
 			e2e_node.AddHostnameIp(name, externalIp)
 		}
 		var output string
-		output, err = e2e_node.RunSshCommand("ssh", name, "--", "sudo", "docker", "version")
+		output, err = e2e_node.RunSshCommand("ssh", e2e_node.GetHostnameOrIp(name), "--", "sudo", "docker", "version")
 		if err != nil {
-			err = fmt.Errorf("Instance %s not running docker daemon - Command failed: %s", name, output)
+			err = fmt.Errorf("instance %s not running docker daemon - Command failed: %s", name, output)
 			continue
 		}
 		if !strings.Contains(output, "Server") {
-			err = fmt.Errorf("Instance %s not running docker daemon - Server not found: %s", name, output)
+			err = fmt.Errorf("instance %s not running docker daemon - Server not found: %s", name, output)
 			continue
 		}
 		instanceRunning = true
@ -248,6 +295,47 @@ func createInstance(image string) (string, error) {
 	return name, err
 }
 func getExternalIp(instance *compute.Instance) string {
 	for i := range instance.NetworkInterfaces {
 		ni := instance.NetworkInterfaces[i]
 		for j := range ni.AccessConfigs {
 			ac := ni.AccessConfigs[j]
 			if len(ac.NatIP) > 0 {
 				return ac.NatIP
 			}
 		}
 	}
 	return ""
 }
 func getComputeClient() (*compute.Service, error) {
 	const retries = 10
 	const backoff = time.Second * 6
 	// Setup the gce client for provisioning instances
 	// Getting credentials on gce jenkins is flaky, so try a couple times
 	var err error
 	var cs *compute.Service
 	for i := 0; i < retries; i++ {
 		if i > 0 {
 			time.Sleep(backoff)
 		}
 		var client *http.Client
 		client, err = google.DefaultClient(oauth2.NoContext, compute.ComputeScope)
 		if err != nil {
 			continue
 		}
 		cs, err = compute.New(client)
 		if err != nil {
 			continue
 		}
 		return cs, nil
 	}
 	return nil, err
 }
 func deleteInstance(image string) {
 	_, err := computeService.Instances.Delete(*project, *zone, imageToInstanceName(image)).Do()
 	if err != nil {