let load and density e2e tests use GC if it's on

2016-08-08 17:11:21 -07:00 · 2016-08-08 17:11:21 -07:00 · 3688a78769
parent faffbe4e18
commit 3688a78769
16 changed files with 83 additions and 29 deletions
--- a/cluster/kubemark/config-default.sh
+++ b/cluster/kubemark/config-default.sh
@ -63,9 +63,11 @@ TEST_CLUSTER_API_CONTENT_TYPE="${TEST_CLUSTER_API_CONTENT_TYPE:-}"
 # ContentType used to store objects in underlying database.
 TEST_CLUSTER_STORAGE_CONTENT_TYPE="${TEST_CLUSTER_STORAGE_CONTENT_TYPE:-}"

+ENABLE_GARBAGE_COLLECTOR=${ENABLE_GARBAGE_COLLECTOR:-false}
+
 KUBELET_TEST_ARGS="--max-pods=100 $TEST_CLUSTER_LOG_LEVEL ${TEST_CLUSTER_API_CONTENT_TYPE}"
-APISERVER_TEST_ARGS="--runtime-config=extensions/v1beta1 ${API_SERVER_TEST_LOG_LEVEL} ${TEST_CLUSTER_STORAGE_CONTENT_TYPE} ${TEST_CLUSTER_DELETE_COLLECTION_WORKERS}"
-CONTROLLER_MANAGER_TEST_ARGS="${CONTROLLER_MANAGER_TEST_LOG_LEVEL} ${TEST_CLUSTER_RESYNC_PERIOD} ${TEST_CLUSTER_API_CONTENT_TYPE} ${KUBEMARK_MASTER_COMPONENTS_QPS_LIMITS}"
+APISERVER_TEST_ARGS="--runtime-config=extensions/v1beta1 ${API_SERVER_TEST_LOG_LEVEL} ${TEST_CLUSTER_STORAGE_CONTENT_TYPE} ${TEST_CLUSTER_DELETE_COLLECTION_WORKERS} --enable-garbage-collector=${ENABLE_GARBAGE_COLLECTOR}"
+CONTROLLER_MANAGER_TEST_ARGS="${CONTROLLER_MANAGER_TEST_LOG_LEVEL} ${TEST_CLUSTER_RESYNC_PERIOD} ${TEST_CLUSTER_API_CONTENT_TYPE} ${KUBEMARK_MASTER_COMPONENTS_QPS_LIMITS} --enable-garbage-collector=${ENABLE_GARBAGE_COLLECTOR}"
 SCHEDULER_TEST_ARGS="${SCHEDULER_TEST_LOG_LEVEL} ${TEST_CLUSTER_API_CONTENT_TYPE} ${KUBEMARK_MASTER_COMPONENTS_QPS_LIMITS}"
 KUBEPROXY_TEST_ARGS="${KUBEPROXY_TEST_LOG_LEVEL} ${TEST_CLUSTER_API_CONTENT_TYPE}"

--- a/hack/verify-flags/known-flags.txt
+++ b/hack/verify-flags/known-flags.txt
@ -498,3 +498,4 @@ whitelist-override-label
 windows-line-endings
 www-prefix
 zone-name
+garbage-collector-enabled
--- a/test/e2e/autoscaling_utils.go
+++ b/test/e2e/autoscaling_utils.go
@ -302,9 +302,9 @@ func (rc *ResourceConsumer) CleanUp() {
 	rc.stopCustomMetric <- 0
 	// Wait some time to ensure all child goroutines are finished.
 	time.Sleep(10 * time.Second)
-	framework.ExpectNoError(framework.DeleteRC(rc.framework.Client, rc.framework.Namespace.Name, rc.name))
+	framework.ExpectNoError(framework.DeleteRCAndPods(rc.framework.Client, rc.framework.Namespace.Name, rc.name))
 	framework.ExpectNoError(rc.framework.Client.Services(rc.framework.Namespace.Name).Delete(rc.name))
-	framework.ExpectNoError(framework.DeleteRC(rc.framework.Client, rc.framework.Namespace.Name, rc.controllerName))
+	framework.ExpectNoError(framework.DeleteRCAndPods(rc.framework.Client, rc.framework.Namespace.Name, rc.controllerName))
 	framework.ExpectNoError(rc.framework.Client.Services(rc.framework.Namespace.Name).Delete(rc.controllerName))
 }

--- a/test/e2e/cluster_size_autoscaling.go
+++ b/test/e2e/cluster_size_autoscaling.go
@ -97,7 +97,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaling [Slow]", func() {
 	It("shouldn't increase cluster size if pending pod is too large [Feature:ClusterSizeAutoscalingScaleUp]", func() {
 		By("Creating unschedulable pod")
 		ReserveMemory(f, "memory-reservation", 1, memCapacityMb, false)
-		defer framework.DeleteRC(f.Client, f.Namespace.Name, "memory-reservation")
+		defer framework.DeleteRCAndPods(f.Client, f.Namespace.Name, "memory-reservation")

 		By("Waiting for scale up hoping it won't happen")
 		// Verfiy, that the appropreate event was generated.
@ -124,7 +124,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaling [Slow]", func() {

 	It("should increase cluster size if pending pods are small [Feature:ClusterSizeAutoscalingScaleUp]", func() {
 		ReserveMemory(f, "memory-reservation", 100, nodeCount*memCapacityMb, false)
-		defer framework.DeleteRC(f.Client, f.Namespace.Name, "memory-reservation")
+		defer framework.DeleteRCAndPods(f.Client, f.Namespace.Name, "memory-reservation")

 		// Verify, that cluster size is increased
 		framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
@ -143,7 +143,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaling [Slow]", func() {
 		glog.Infof("Not enabling cluster autoscaler for the node pool (on purpose).")

 		ReserveMemory(f, "memory-reservation", 100, nodeCount*memCapacityMb, false)
-		defer framework.DeleteRC(f.Client, f.Namespace.Name, "memory-reservation")
+		defer framework.DeleteRCAndPods(f.Client, f.Namespace.Name, "memory-reservation")

 		// Verify, that cluster size is increased
 		framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
@ -165,7 +165,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaling [Slow]", func() {

 	It("should increase cluster size if pods are pending due to host port conflict [Feature:ClusterSizeAutoscalingScaleUp]", func() {
 		CreateHostPortPods(f, "host-port", nodeCount+2, false)
-		defer framework.DeleteRC(f.Client, f.Namespace.Name, "host-port")
+		defer framework.DeleteRCAndPods(f.Client, f.Namespace.Name, "host-port")

 		framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
 			func(size int) bool { return size >= nodeCount+2 }, scaleUpTimeout))
@ -217,7 +217,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaling [Slow]", func() {
 			func(size int) bool { return size >= nodeCount+1 }, scaleUpTimeout))

 		framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
-		framework.ExpectNoError(framework.DeleteRC(f.Client, f.Namespace.Name, "node-selector"))
+		framework.ExpectNoError(framework.DeleteRCAndPods(f.Client, f.Namespace.Name, "node-selector"))
 	})

 	It("should scale up correct target pool [Feature:ClusterSizeAutoscalingScaleUp]", func() {
@ -232,7 +232,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaling [Slow]", func() {

 		By("Creating rc with 2 pods too big to fit default-pool but fitting extra-pool")
 		ReserveMemory(f, "memory-reservation", 2, 2*memCapacityMb, false)
-		defer framework.DeleteRC(f.Client, f.Namespace.Name, "memory-reservation")
+		defer framework.DeleteRCAndPods(f.Client, f.Namespace.Name, "memory-reservation")

 		// Apparently GKE master is restarted couple minutes after the node pool is added
 		// reseting all the timers in scale down code. Adding 5 extra minutes to workaround
--- a/test/e2e/density.go
+++ b/test/e2e/density.go
@ -319,9 +319,15 @@ func cleanupDensityTest(dtc DensityTestConfig) {
 		rcName := dtc.Configs[i].Name
 		rc, err := dtc.Client.ReplicationControllers(dtc.Namespace).Get(rcName)
 		if err == nil && rc.Spec.Replicas != 0 {
-			By("Cleaning up the replication controller")
-			err := framework.DeleteRC(dtc.Client, dtc.Namespace, rcName)
-			framework.ExpectNoError(err)
+			if framework.TestContext.GarbageCollectorEnabled {
+				By("Cleaning up only the replication controller, garbage collector will clean up the pods")
+				err := framework.DeleteRCAndWaitForGC(dtc.Client, dtc.Namespace, rcName)
+				framework.ExpectNoError(err)
+			} else {
+				By("Cleaning up the replication controller and pods")
+				err := framework.DeleteRCAndPods(dtc.Client, dtc.Namespace, rcName)
+				framework.ExpectNoError(err)
+			}
 		}
 	}
 }
--- a/test/e2e/framework/test_context.go
+++ b/test/e2e/framework/test_context.go
@ -73,6 +73,8 @@ type TestContextType struct {
 	CgroupsPerQOS bool
 	// The hard eviction thresholds
 	EvictionHard string
+	// If the garbage collector is enabled in the kube-apiserver and kube-controller-manager.
+	GarbageCollectorEnabled bool
 }

 type CloudConfig struct {
@ -110,6 +112,7 @@ func RegisterCommonFlags() {
 	flag.StringVar(&TestContext.Host, "host", "http://127.0.0.1:8080", "The host, or apiserver, to connect to")
 	flag.StringVar(&TestContext.ReportPrefix, "report-prefix", "", "Optional prefix for JUnit XML reports. Default is empty, which doesn't prepend anything to the default name.")
 	flag.StringVar(&TestContext.ReportDir, "report-dir", "", "Path to the directory where the JUnit XML reports should be saved. Default is empty, which doesn't generate these reports.")
+	flag.BoolVar(&TestContext.GarbageCollectorEnabled, "garbage-collector-enabled", false, "Set to true if the garbage collector is enabled in the kube-apiserver and kube-controller-manager, then some tests will rely on the garbage collector to delete dependent resources.")
 }

 // Register flags specific to the cluster e2e test suite.
--- a/test/e2e/framework/util.go
+++ b/test/e2e/framework/util.go
@ -3097,8 +3097,8 @@ func WaitForPodsWithLabel(c *client.Client, ns string, label labels.Selector) (p
 	return
 }

-// Delete a Replication Controller and all pods it spawned
-func DeleteRC(c *client.Client, ns, name string) error {
+// DeleteRCAndPods a Replication Controller and all pods it spawned
+func DeleteRCAndPods(c *client.Client, ns, name string) error {
 	By(fmt.Sprintf("deleting replication controller %s in namespace %s", name, ns))
 	rc, err := c.ReplicationControllers(ns).Get(name)
 	if err != nil {
@ -3127,7 +3127,41 @@ func DeleteRC(c *client.Client, ns, name string) error {
 	if err != nil {
 		return fmt.Errorf("error while stopping RC: %s: %v", name, err)
 	}
-	err = waitForRCPodsGone(c, rc)
+	err = waitForRCPodsGone(c, rc, nil)
+	if err != nil {
+		return fmt.Errorf("error while deleting RC %s: %v", name, err)
+	}
+	terminatePodTime := time.Now().Sub(startTime) - deleteRCTime
+	Logf("Terminating RC %s pods took: %v", name, terminatePodTime)
+	return nil
+}
+
+// DeleteRCAndWaitForGC deletes only the Replication Controller and waits for GC to delete the pods.
+func DeleteRCAndWaitForGC(c *client.Client, ns, name string) error {
+	By(fmt.Sprintf("deleting replication controller %s in namespace %s, will wait for the garbage collector to delete the pods", name, ns))
+	rc, err := c.ReplicationControllers(ns).Get(name)
+	if err != nil {
+		if apierrs.IsNotFound(err) {
+			Logf("RC %s was already deleted: %v", name, err)
+			return nil
+		}
+		return err
+	}
+	startTime := time.Now()
+	falseVar := false
+	deleteOption := &api.DeleteOptions{OrphanDependents: &falseVar}
+	err = c.ReplicationControllers(ns).Delete(name, deleteOption)
+	if err != nil && apierrs.IsNotFound(err) {
+		Logf("RC %s was already deleted: %v", name, err)
+		return nil
+	}
+	if err != nil {
+		return err
+	}
+	deleteRCTime := time.Now().Sub(startTime)
+	Logf("Deleting RC %s took: %v", name, deleteRCTime)
+	timeout := 10 * time.Minute
+	err = waitForRCPodsGone(c, rc, &timeout)
 	if err != nil {
 		return fmt.Errorf("error while deleting RC %s: %v", name, err)
 	}
@ -3138,12 +3172,16 @@ func DeleteRC(c *client.Client, ns, name string) error {

 // waitForRCPodsGone waits until there are no pods reported under an RC's selector (because the pods
 // have completed termination).
-func waitForRCPodsGone(c *client.Client, rc *api.ReplicationController) error {
+func waitForRCPodsGone(c *client.Client, rc *api.ReplicationController, timeout *time.Duration) error {
+	if timeout == nil {
+		defaultTimeout := 2 * time.Minute
+		timeout = &defaultTimeout
+	}
 	labels := labels.SelectorFromSet(rc.Spec.Selector)
 	PodStore := NewPodStore(c, rc.Namespace, labels, fields.Everything())
 	defer PodStore.Stop()

-	return wait.PollImmediate(Poll, 2*time.Minute, func() (bool, error) {
+	return wait.PollImmediate(Poll, *timeout, func() (bool, error) {
 		if pods := PodStore.List(); len(pods) == 0 {
 			return true, nil
 		}
@ -4306,7 +4344,7 @@ func ScaleRCByLabels(client *client.Client, ns string, l map[string]string, repl
 			return err
 		}
 		if replicas == 0 {
-			if err := waitForRCPodsGone(client, rc); err != nil {
+			if err := waitForRCPodsGone(client, rc, nil); err != nil {
 				return err
 			}
 		} else {
--- a/test/e2e/kubelet.go
+++ b/test/e2e/kubelet.go
@ -205,7 +205,7 @@ var _ = framework.KubeDescribe("kubelet", func() {
 				}

 				By("Deleting the RC")
-				framework.DeleteRC(f.Client, f.Namespace.Name, rcName)
+				framework.DeleteRCAndPods(f.Client, f.Namespace.Name, rcName)
 				// Check that the pods really are gone by querying /runningpods on the
 				// node. The /runningpods handler checks the container runtime (or its
 				// cache) and  returns a list of running pods. Some possible causes of
--- a/test/e2e/kubelet_perf.go
+++ b/test/e2e/kubelet_perf.go
@ -115,7 +115,7 @@ func runResourceTrackingTest(f *framework.Framework, podsPerNode int, nodeNames
 	verifyCPULimits(expectedCPU, cpuSummary)

 	By("Deleting the RC")
-	framework.DeleteRC(f.Client, f.Namespace.Name, rcName)
+	framework.DeleteRCAndPods(f.Client, f.Namespace.Name, rcName)
 }

 func verifyMemoryLimits(c *client.Client, expected framework.ResourceUsagePerContainer, actual framework.ResourceUsagePerNode) {
--- a/test/e2e/load.go
+++ b/test/e2e/load.go
@ -339,5 +339,9 @@ func deleteRC(wg *sync.WaitGroup, config *framework.RCConfig, deletingTime time.
 	defer wg.Done()

 	sleepUpTo(deletingTime)
-	framework.ExpectNoError(framework.DeleteRC(config.Client, config.Namespace, config.Name), fmt.Sprintf("deleting rc %s", config.Name))
+	if framework.TestContext.GarbageCollectorEnabled {
+		framework.ExpectNoError(framework.DeleteRCAndWaitForGC(config.Client, config.Namespace, config.Name), fmt.Sprintf("deleting rc %s", config.Name))
+	} else {
+		framework.ExpectNoError(framework.DeleteRCAndPods(config.Client, config.Namespace, config.Name), fmt.Sprintf("deleting rc %s", config.Name))
+	}
 }
--- a/test/e2e/proxy.go
+++ b/test/e2e/proxy.go
@ -154,7 +154,7 @@ func proxyContext(version string) {
 			CreatedPods: &pods,
 		}
 		Expect(framework.RunRC(cfg)).NotTo(HaveOccurred())
-		defer framework.DeleteRC(f.Client, f.Namespace.Name, cfg.Name)
+		defer framework.DeleteRCAndPods(f.Client, f.Namespace.Name, cfg.Name)

 		Expect(f.WaitForAnEndpoint(service.Name)).NotTo(HaveOccurred())

--- a/test/e2e/rc.go
+++ b/test/e2e/rc.go
@ -86,7 +86,7 @@ func ServeImageOrFail(f *framework.Framework, test string, image string) {
 	// Cleanup the replication controller when we are done.
 	defer func() {
 		// Resize the replication controller to zero to get rid of pods.
-		if err := framework.DeleteRC(f.Client, f.Namespace.Name, controller.Name); err != nil {
+		if err := framework.DeleteRCAndPods(f.Client, f.Namespace.Name, controller.Name); err != nil {
 			framework.Logf("Failed to cleanup replication controller %v: %v.", controller.Name, err)
 		}
 	}()
--- a/test/e2e/scheduler_predicates.go
+++ b/test/e2e/scheduler_predicates.go
@ -187,7 +187,7 @@ var _ = framework.KubeDescribe("SchedulerPredicates [Serial]", func() {
 		rc, err := c.ReplicationControllers(ns).Get(RCName)
 		if err == nil && rc.Spec.Replicas != 0 {
 			By("Cleaning up the replication controller")
-			err := framework.DeleteRC(c, ns, RCName)
+			err := framework.DeleteRCAndPods(c, ns, RCName)
 			framework.ExpectNoError(err)
 		}
 	})
@ -958,7 +958,7 @@ var _ = framework.KubeDescribe("SchedulerPredicates [Serial]", func() {
 		// cannot be scheduled onto it.
 		By("Launching two pods on two distinct nodes to get two node names")
 		CreateHostPortPods(f, "host-port", 2, true)
-		defer framework.DeleteRC(f.Client, f.Namespace.Name, "host-port")
+		defer framework.DeleteRCAndPods(f.Client, f.Namespace.Name, "host-port")
 		podList, err := c.Pods(ns).List(api.ListOptions{})
 		ExpectNoError(err)
 		Expect(len(podList.Items)).To(Equal(2))
--- a/test/e2e/service.go
+++ b/test/e2e/service.go
@ -1402,7 +1402,7 @@ func startServeHostnameService(c *client.Client, ns, name string, port, replicas
 }

 func stopServeHostnameService(c *client.Client, ns, name string) error {
-	if err := framework.DeleteRC(c, ns, name); err != nil {
+	if err := framework.DeleteRCAndPods(c, ns, name); err != nil {
 		return err
 	}
 	if err := c.Services(ns).Delete(name); err != nil {
--- a/test/e2e/service_latency.go
+++ b/test/e2e/service_latency.go
@ -127,7 +127,7 @@ func runServiceLatencies(f *framework.Framework, inParallel, total int) (output
 	if err := framework.RunRC(cfg); err != nil {
 		return nil, err
 	}
-	defer framework.DeleteRC(f.Client, f.Namespace.Name, cfg.Name)
+	defer framework.DeleteRCAndPods(f.Client, f.Namespace.Name, cfg.Name)

 	// Run a single watcher, to reduce the number of API calls we have to
 	// make; this is to minimize the timing error. It's how kube-proxy
--- a/test/e2e/ubernetes_lite.go
+++ b/test/e2e/ubernetes_lite.go
@ -219,7 +219,7 @@ func SpreadRCOrFail(f *framework.Framework, replicaCount int32, image string) {
 	// Cleanup the replication controller when we are done.
 	defer func() {
 		// Resize the replication controller to zero to get rid of pods.
-		if err := framework.DeleteRC(f.Client, f.Namespace.Name, controller.Name); err != nil {
+		if err := framework.DeleteRCAndPods(f.Client, f.Namespace.Name, controller.Name); err != nil {
 			framework.Logf("Failed to cleanup replication controller %v: %v.", controller.Name, err)
 		}
 	}()