diff --git a/hack/ginkgo-e2e.sh b/hack/ginkgo-e2e.sh index 502b28fcc8..87fd77276b 100755 --- a/hack/ginkgo-e2e.sh +++ b/hack/ginkgo-e2e.sh @@ -72,6 +72,19 @@ else NODE_INSTANCE_GROUP="" fi +if [[ "${KUBERNETES_PROVIDER}" == "gce" ]]; then + set_num_migs + NODE_INSTANCE_GROUP="" + for ((i=1; i<=${NUM_MIGS}; i++)); do + if [[ $i == ${NUM_MIGS} ]]; then + # We are assigning the same mig names as create-nodes function from cluster/gce/util.sh. + NODE_INSTANCE_GROUP="${NODE_INSTANCE_GROUP}${NODE_INSTANCE_PREFIX}-group" + else + NODE_INSTANCE_GROUP="${NODE_INSTANCE_GROUP}${NODE_INSTANCE_PREFIX}-group-${i}," + fi + done +fi + if [[ "${KUBERNETES_PROVIDER}" == "gke" ]]; then detect-node-instance-group fi diff --git a/test/e2e/cluster_size_autoscaling.go b/test/e2e/cluster_size_autoscaling.go index 330991f745..62e55b29d2 100644 --- a/test/e2e/cluster_size_autoscaling.go +++ b/test/e2e/cluster_size_autoscaling.go @@ -18,6 +18,7 @@ package e2e import ( "fmt" + "strings" "time" "k8s.io/kubernetes/pkg/api" @@ -37,11 +38,12 @@ const ( scaleDownTimeout = 15 * time.Minute ) -var _ = framework.KubeDescribe("Cluster size autoscaling scale up [Feature:ClusterSizeAutoscaling] [Slow]", func() { +var _ = framework.KubeDescribe("Cluster size autoscaling [Slow]", func() { f := framework.NewDefaultFramework("autoscaling") var nodeCount int var coresPerNode int var memCapacityMb int + var originalSizes map[string]int BeforeEach(func() { framework.SkipUnlessProviderIs("gce") @@ -53,57 +55,54 @@ var _ = framework.KubeDescribe("Cluster size autoscaling scale up [Feature:Clust mem := nodes.Items[0].Status.Capacity[api.ResourceMemory] coresPerNode = int((&cpu).MilliValue() / 1000) memCapacityMb = int((&mem).Value() / 1024 / 1024) + + originalSizes = make(map[string]int) + sum := 0 + for _, mig := range strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") { + size, err := GroupSize(mig) + framework.ExpectNoError(err) + By(fmt.Sprintf("Initial size of %s: %d", mig, size)) + originalSizes[mig] = size + sum += size + } + Expect(nodeCount).Should(Equal(sum)) }) - It("Should correctly handle pending pods", func() { - By("Too large pending pod does not increase cluster size") + It("shouldn't increase cluster size if pending pod it too large [Feature:ClusterSizeAutoscalingScaleUp]", func() { ReserveMemory(f, "memory-reservation", 1, memCapacityMb, false) // Verify, that cluster size is not changed. - // TODO: find a better way of verification that the cluster size will remain unchanged. + // TODO: find a better way of verification that the cluster size will remain unchanged using events. time.Sleep(scaleUpTimeout) - framework.ExpectNoError(framework.WaitForClusterSize(f.Client, nodeCount, scaleUpTimeout)) + framework.ExpectNoError(WaitForClusterSizeFunc(f.Client, + func(size int) bool { return size <= nodeCount }, scaleDownTimeout)) framework.ExpectNoError(framework.DeleteRC(f.Client, f.Namespace.Name, "memory-reservation")) - framework.ExpectNoError(ResizeGroup(int32(nodeCount))) - framework.ExpectNoError(framework.WaitForClusterSize(f.Client, nodeCount, resizeTimeout)) + framework.ExpectNoError(WaitForClusterSizeFunc(f.Client, + func(size int) bool { return size <= nodeCount }, scaleDownTimeout)) + }) - By("Small pending pods increase cluster size") + It("should increase cluster size if pending pods are small [Feature:ClusterSizeAutoscalingScaleUp]", func() { ReserveMemory(f, "memory-reservation", 100, nodeCount*memCapacityMb, false) // Verify, that cluster size is increased framework.ExpectNoError(WaitForClusterSizeFunc(f.Client, func(size int) bool { return size >= nodeCount+1 }, scaleUpTimeout)) framework.ExpectNoError(framework.DeleteRC(f.Client, f.Namespace.Name, "memory-reservation")) - framework.ExpectNoError(ResizeGroup(int32(nodeCount))) - framework.ExpectNoError(framework.WaitForClusterSize(f.Client, nodeCount, resizeTimeout)) + restoreSizes(originalSizes) + framework.ExpectNoError(WaitForClusterSizeFunc(f.Client, + func(size int) bool { return size <= nodeCount }, scaleDownTimeout)) + }) - By("Handling node port pods") + It("should increase cluster size if pods are pending due to host port conflict [Feature:ClusterSizeAutoscalingScaleUp]", func() { CreateHostPortPods(f, "host-port", nodeCount+2, false) framework.ExpectNoError(WaitForClusterSizeFunc(f.Client, func(size int) bool { return size >= nodeCount+2 }, scaleUpTimeout)) + framework.ExpectNoError(framework.DeleteRC(f.Client, f.Namespace.Name, "host-port")) + restoreSizes(originalSizes) + framework.ExpectNoError(WaitForClusterSizeFunc(f.Client, + func(size int) bool { return size <= nodeCount }, scaleDownTimeout)) - framework.ExpectNoError(ResizeGroup(int32(nodeCount))) - framework.ExpectNoError(framework.WaitForClusterSize(f.Client, nodeCount, resizeTimeout)) - }) -}) - -var _ = framework.KubeDescribe("Cluster size autoscaling scale down[Feature:ClusterSizeAutoscalingScaleDown] [Slow]", func() { - f := framework.NewDefaultFramework("autoscaling") - var nodeCount int - var coresPerNode int - var memCapacityMb int - - BeforeEach(func() { - framework.SkipUnlessProviderIs("gce") - - nodes := framework.GetReadySchedulableNodesOrDie(f.Client) - nodeCount = len(nodes.Items) - Expect(nodeCount).NotTo(BeZero()) - cpu := nodes.Items[0].Status.Capacity[api.ResourceCPU] - mem := nodes.Items[0].Status.Capacity[api.ResourceMemory] - coresPerNode = int((&cpu).MilliValue() / 1000) - memCapacityMb = int((&mem).Value() / 1024 / 1024) }) - It("Should correctly handle pending and scale down after deletion", func() { + It("should correctly handle pending and scale down after deletion [Feature:ClusterSizeAutoscalingScaleDown]", func() { By("Small pending pods increase cluster size") ReserveMemory(f, "memory-reservation", 100, nodeCount*memCapacityMb, false) // Verify, that cluster size is increased @@ -192,3 +191,16 @@ func WaitForClusterSizeFunc(c *client.Client, sizeFunc func(int) bool, timeout t } return fmt.Errorf("timeout waiting %v for appropriate cluster size", timeout) } + +func restoreSizes(sizes map[string]int) { + By(fmt.Sprintf("Restoring initial size of the cluster")) + for mig, desiredSize := range sizes { + currentSize, err := GroupSize(mig) + framework.ExpectNoError(err) + if desiredSize != currentSize { + By(fmt.Sprintf("Setting size of %s to %d", mig, desiredSize)) + err = ResizeGroup(mig, int32(desiredSize)) + framework.ExpectNoError(err) + } + } +} diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index 128cc07c17..d8bac29c26 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -113,7 +113,7 @@ func RegisterFlags() { flag.StringVar(&cloudConfig.Zone, "gce-zone", "", "GCE zone being used, if applicable") flag.StringVar(&cloudConfig.ServiceAccount, "gce-service-account", "", "GCE service account to use for GCE API calls, if applicable") flag.StringVar(&cloudConfig.Cluster, "gke-cluster", "", "GKE name of cluster being used, if applicable") - flag.StringVar(&cloudConfig.NodeInstanceGroup, "node-instance-group", "", "Name of the managed instance group for nodes. Valid only for gce, gke or aws") + flag.StringVar(&cloudConfig.NodeInstanceGroup, "node-instance-group", "", "Name of the managed instance group for nodes. Valid only for gce, gke or aws. If there is more than one group: comma separated list of groups.") flag.IntVar(&cloudConfig.NumNodes, "num-nodes", -1, "Number of nodes in the cluster") flag.StringVar(&cloudConfig.ClusterTag, "cluster-tag", "", "Tag used to identify resources. Only required if provider is aws.") diff --git a/test/e2e/resize_nodes.go b/test/e2e/resize_nodes.go index c2b25e4647..c8e6da13a1 100644 --- a/test/e2e/resize_nodes.go +++ b/test/e2e/resize_nodes.go @@ -53,7 +53,7 @@ const ( testPort = 9376 ) -func ResizeGroup(size int32) error { +func ResizeGroup(group string, size int32) error { if framework.TestContext.ReportDir != "" { framework.CoreDump(framework.TestContext.ReportDir) defer framework.CoreDump(framework.TestContext.ReportDir) @@ -62,7 +62,7 @@ func ResizeGroup(size int32) error { // TODO: make this hit the compute API directly instead of shelling out to gcloud. // TODO: make gce/gke implement InstanceGroups, so we can eliminate the per-provider logic output, err := exec.Command("gcloud", "compute", "instance-groups", "managed", "resize", - framework.TestContext.CloudConfig.NodeInstanceGroup, fmt.Sprintf("--size=%v", size), + group, fmt.Sprintf("--size=%v", size), "--project="+framework.TestContext.CloudConfig.ProjectID, "--zone="+framework.TestContext.CloudConfig.Zone).CombinedOutput() if err != nil { framework.Logf("Failed to resize node instance group: %v", string(output)) @@ -70,18 +70,18 @@ func ResizeGroup(size int32) error { return err } else if framework.TestContext.Provider == "aws" { client := autoscaling.New(session.New()) - return awscloud.ResizeInstanceGroup(client, framework.TestContext.CloudConfig.NodeInstanceGroup, int(size)) + return awscloud.ResizeInstanceGroup(client, group, int(size)) } else { return fmt.Errorf("Provider does not support InstanceGroups") } } -func groupSize() (int, error) { +func GroupSize(group string) (int, error) { if framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" { // TODO: make this hit the compute API directly instead of shelling out to gcloud. // TODO: make gce/gke implement InstanceGroups, so we can eliminate the per-provider logic output, err := exec.Command("gcloud", "compute", "instance-groups", "managed", - "list-instances", framework.TestContext.CloudConfig.NodeInstanceGroup, "--project="+framework.TestContext.CloudConfig.ProjectID, + "list-instances", group, "--project="+framework.TestContext.CloudConfig.ProjectID, "--zone="+framework.TestContext.CloudConfig.Zone).CombinedOutput() if err != nil { return -1, err @@ -90,12 +90,12 @@ func groupSize() (int, error) { return len(re.FindAllString(string(output), -1)), nil } else if framework.TestContext.Provider == "aws" { client := autoscaling.New(session.New()) - instanceGroup, err := awscloud.DescribeInstanceGroup(client, framework.TestContext.CloudConfig.NodeInstanceGroup) + instanceGroup, err := awscloud.DescribeInstanceGroup(client, group) if err != nil { return -1, fmt.Errorf("error describing instance group: %v", err) } if instanceGroup == nil { - return -1, fmt.Errorf("instance group not found: %s", framework.TestContext.CloudConfig.NodeInstanceGroup) + return -1, fmt.Errorf("instance group not found: %s", group) } return instanceGroup.CurrentSize() } else { @@ -103,10 +103,10 @@ func groupSize() (int, error) { } } -func waitForGroupSize(size int32) error { +func WaitForGroupSize(group string, size int32) error { timeout := 10 * time.Minute for start := time.Now(); time.Since(start) < timeout; time.Sleep(5 * time.Second) { - currentSize, err := groupSize() + currentSize, err := GroupSize(group) if err != nil { framework.Logf("Failed to get node instance group size: %v", err) continue @@ -347,13 +347,19 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() { var c *client.Client var ns string ignoreLabels := framework.ImagePullerLabels + var group string + BeforeEach(func() { c = f.Client ns = f.Namespace.Name systemPods, err := framework.GetPodsInNamespace(c, ns, ignoreLabels) Expect(err).NotTo(HaveOccurred()) systemPodsNo = int32(len(systemPods)) - + if strings.Index(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") >= 0 { + framework.Failf("Test dose not support cluster setup with more than one MIG: %s", framework.TestContext.CloudConfig.NodeInstanceGroup) + } else { + group = framework.TestContext.CloudConfig.NodeInstanceGroup + } }) // Slow issue #13323 (8 min) @@ -373,7 +379,7 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() { } By("restoring the original node instance group size") - if err := ResizeGroup(int32(framework.TestContext.CloudConfig.NumNodes)); err != nil { + if err := ResizeGroup(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil { framework.Failf("Couldn't restore the original node instance group size: %v", err) } // In GKE, our current tunneling setup has the potential to hold on to a broken tunnel (from a @@ -388,7 +394,7 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() { By("waiting 5 minutes for all dead tunnels to be dropped") time.Sleep(5 * time.Minute) } - if err := waitForGroupSize(int32(framework.TestContext.CloudConfig.NumNodes)); err != nil { + if err := WaitForGroupSize(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil { framework.Failf("Couldn't restore the original node instance group size: %v", err) } if err := framework.WaitForClusterSize(c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute); err != nil { @@ -412,9 +418,9 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() { Expect(err).NotTo(HaveOccurred()) By(fmt.Sprintf("decreasing cluster size to %d", replicas-1)) - err = ResizeGroup(replicas - 1) + err = ResizeGroup(group, replicas-1) Expect(err).NotTo(HaveOccurred()) - err = waitForGroupSize(replicas - 1) + err = WaitForGroupSize(group, replicas-1) Expect(err).NotTo(HaveOccurred()) err = framework.WaitForClusterSize(c, int(replicas-1), 10*time.Minute) Expect(err).NotTo(HaveOccurred()) @@ -436,9 +442,9 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() { Expect(err).NotTo(HaveOccurred()) By(fmt.Sprintf("increasing cluster size to %d", replicas+1)) - err = ResizeGroup(replicas + 1) + err = ResizeGroup(group, replicas+1) Expect(err).NotTo(HaveOccurred()) - err = waitForGroupSize(replicas + 1) + err = WaitForGroupSize(group, replicas+1) Expect(err).NotTo(HaveOccurred()) err = framework.WaitForClusterSize(c, int(replicas+1), 10*time.Minute) Expect(err).NotTo(HaveOccurred())