Split kubelet e2e resource usage tracking tests

Some tests in this test suite expects --max-pods (i.e. the maximum pod capacity on kubelet) to be greater than default, which applies only to the GCE test environment. Split the tests into two sets so that we can better categorize the tests in the jenkins setup, without making the test itself aware of the environment.
2015-10-01 10:46:46 -07:00 · 2015-10-01 10:46:46 -07:00 · 15cdc45b09
parent a512111c9d
commit 15cdc45b09
2 changed files with 72 additions and 52 deletions
--- a/hack/jenkins/e2e.sh
+++ b/hack/jenkins/e2e.sh
@ -112,8 +112,14 @@ GKE_REQUIRED_SKIP_TESTS=(
    "Shell"
    "Daemon\sset"
    "Deployment"
+    "experimental\sresource\susage\stracking" # Expect --max-pods=100
    )

+# Tests which cannot be run on AWS.
+AWS_REQUIRED_SKIP_TESTS=(
+    "experimental\sresource\susage\stracking" # Expect --max-pods=100
+)
+
 # The following tests are known to be flaky, and are thus run only in their own
 # -flaky- build variants.
 GCE_FLAKY_TESTS=(
@ -301,6 +307,7 @@ case ${JOB_NAME} in
          ${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \
          ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
          ${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \
+          ${AWS_REQUIRED_SKIP_TESTS[@]:+${AWS_REQUIRED_SKIP_TESTS[@]}} \
          )"}
    : ${ENABLE_DEPLOYMENTS:=true}
    # Override AWS defaults.
@ -439,6 +446,7 @@ case ${JOB_NAME} in
          ${GKE_REQUIRED_SKIP_TESTS[@]:+${GKE_REQUIRED_SKIP_TESTS[@]}} \
          ${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
          ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
+          ${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
          )"}
    ;;

@ -455,6 +463,7 @@ case ${JOB_NAME} in
          ${REBOOT_SKIP_TESTS[@]:+${REBOOT_SKIP_TESTS[@]}} \
          ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
          ${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \
+          ${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
          )"}
    ;;

@ -499,6 +508,7 @@ case ${JOB_NAME} in
          ${GKE_REQUIRED_SKIP_TESTS[@]:+${GKE_REQUIRED_SKIP_TESTS[@]}} \
          ${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
          ${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
+          ${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
          )"}
    ;;

--- a/test/e2e/kubelet_perf.go
+++ b/test/e2e/kubelet_perf.go
@ -50,8 +50,55 @@ func logPodsOnNodes(c *client.Client, nodeNames []string) {
 	}
 }

+func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames sets.String, resourceMonitor *resourceMonitor) {
+	numNodes := nodeNames.Len()
+	totalPods := podsPerNode * numNodes
+	By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods))
+	rcName := fmt.Sprintf("resource%d-%s", totalPods, string(util.NewUUID()))
+
+	// TODO: Use a more realistic workload
+	Expect(RunRC(RCConfig{
+		Client:    framework.Client,
+		Name:      rcName,
+		Namespace: framework.Namespace.Name,
+		Image:     "gcr.io/google_containers/pause:go",
+		Replicas:  totalPods,
+	})).NotTo(HaveOccurred())
+
+	// Log once and flush the stats.
+	resourceMonitor.LogLatest()
+	resourceMonitor.Reset()
+
+	By("Start monitoring resource usage")
+	// Periodically dump the cpu summary until the deadline is met.
+	// Note that without calling resourceMonitor.Reset(), the stats
+	// would occupy increasingly more memory. This should be fine
+	// for the current test duration, but we should reclaim the
+	// entries if we plan to monitor longer (e.g., 8 hours).
+	deadline := time.Now().Add(monitoringTime)
+	for time.Now().Before(deadline) {
+		Logf("Still running...%v left", deadline.Sub(time.Now()))
+		time.Sleep(reportingPeriod)
+		timeLeft := deadline.Sub(time.Now())
+		Logf("Still running...%v left", timeLeft)
+		if timeLeft < reportingPeriod {
+			time.Sleep(timeLeft)
+		} else {
+			time.Sleep(reportingPeriod)
+		}
+		logPodsOnNodes(framework.Client, nodeNames.List())
+	}
+
+	By("Reporting overall resource usage")
+	logPodsOnNodes(framework.Client, nodeNames.List())
+	resourceMonitor.LogCPUSummary()
+	resourceMonitor.LogLatest()
+
+	By("Deleting the RC")
+	DeleteRC(framework.Client, framework.Namespace.Name, rcName)
+}
+
 var _ = Describe("Kubelet", func() {
-	var numNodes int
 	var nodeNames sets.String
 	framework := NewFramework("kubelet-perf")
 	var resourceMonitor *resourceMonitor
@ -59,7 +106,6 @@ var _ = Describe("Kubelet", func() {
 	BeforeEach(func() {
 		nodes, err := framework.Client.Nodes().List(labels.Everything(), fields.Everything())
 		expectNoError(err)
-		numNodes = len(nodes.Items)
 		nodeNames = sets.NewString()
 		for _, node := range nodes.Items {
 			nodeNames.Insert(node.Name)
@ -72,61 +118,25 @@ var _ = Describe("Kubelet", func() {
 		resourceMonitor.Stop()
 	})

-	Describe("resource usage tracking", func() {
-		density := []int{0, 50}
+	Describe("regular resource usage tracking", func() {
+		density := []int{0, 35}
 		for i := range density {
 			podsPerNode := density[i]
 			name := fmt.Sprintf(
 				"over %v with %d pods per node.", monitoringTime, podsPerNode)
 			It(name, func() {
-				// Skip this test for GKE.
-				// TODO: Re-activate this for GKE
-				SkipIfProviderIs("gke")
-
-				totalPods := podsPerNode * numNodes
-				By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods))
-				rcName := fmt.Sprintf("resource%d-%s", totalPods, string(util.NewUUID()))
-
-				// TODO: Use a more realistic workload
-				Expect(RunRC(RCConfig{
-					Client:    framework.Client,
-					Name:      rcName,
-					Namespace: framework.Namespace.Name,
-					Image:     "gcr.io/google_containers/pause:go",
-					Replicas:  totalPods,
-				})).NotTo(HaveOccurred())
-
-				// Log once and flush the stats.
-				resourceMonitor.LogLatest()
-				resourceMonitor.Reset()
-
-				By("Start monitoring resource usage")
-				// Periodically dump the cpu summary until the deadline is met.
-				// Note that without calling resourceMonitor.Reset(), the stats
-				// would occupy increasingly more memory. This should be fine
-				// for the current test duration, but we should reclaim the
-				// entries if we plan to monitor longer (e.g., 8 hours).
-				deadline := time.Now().Add(monitoringTime)
-				for time.Now().Before(deadline) {
-					Logf("Still running...%v left", deadline.Sub(time.Now()))
-					time.Sleep(reportingPeriod)
-					timeLeft := deadline.Sub(time.Now())
-					Logf("Still running...%v left", timeLeft)
-					if timeLeft < reportingPeriod {
-						time.Sleep(timeLeft)
-					} else {
-						time.Sleep(reportingPeriod)
-					}
-					logPodsOnNodes(framework.Client, nodeNames.List())
-				}
-
-				By("Reporting overall resource usage")
-				logPodsOnNodes(framework.Client, nodeNames.List())
-				resourceMonitor.LogCPUSummary()
-				resourceMonitor.LogLatest()
-
-				By("Deleting the RC")
-				DeleteRC(framework.Client, framework.Namespace.Name, rcName)
+				runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor)
+			})
+		}
+	})
+	Describe("experimental resource usage tracking", func() {
+		density := []int{50}
+		for i := range density {
+			podsPerNode := density[i]
+			name := fmt.Sprintf(
+				"over %v with %d pods per node.", monitoringTime, podsPerNode)
+			It(name, func() {
+				runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor)
 			})
 		}
 	})