Multiple MIGs in e2e cluster autoscaling tests.

Implemented support for multiple MIGs in e2e cluster autoscaling tests.
pull/6/head
Jerzy Szczepkowski 2016-05-23 14:10:40 +02:00
parent 0a6a52b19d
commit 93e5b12a06
4 changed files with 81 additions and 50 deletions

View File

@ -72,6 +72,19 @@ else
NODE_INSTANCE_GROUP="" NODE_INSTANCE_GROUP=""
fi fi
if [[ "${KUBERNETES_PROVIDER}" == "gce" ]]; then
set_num_migs
NODE_INSTANCE_GROUP=""
for ((i=1; i<=${NUM_MIGS}; i++)); do
if [[ $i == ${NUM_MIGS} ]]; then
# We are assigning the same mig names as create-nodes function from cluster/gce/util.sh.
NODE_INSTANCE_GROUP="${NODE_INSTANCE_GROUP}${NODE_INSTANCE_PREFIX}-group"
else
NODE_INSTANCE_GROUP="${NODE_INSTANCE_GROUP}${NODE_INSTANCE_PREFIX}-group-${i},"
fi
done
fi
if [[ "${KUBERNETES_PROVIDER}" == "gke" ]]; then if [[ "${KUBERNETES_PROVIDER}" == "gke" ]]; then
detect-node-instance-group detect-node-instance-group
fi fi

View File

@ -18,6 +18,7 @@ package e2e
import ( import (
"fmt" "fmt"
"strings"
"time" "time"
"k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api"
@ -37,11 +38,12 @@ const (
scaleDownTimeout = 15 * time.Minute scaleDownTimeout = 15 * time.Minute
) )
var _ = framework.KubeDescribe("Cluster size autoscaling scale up [Feature:ClusterSizeAutoscaling] [Slow]", func() { var _ = framework.KubeDescribe("Cluster size autoscaling [Slow]", func() {
f := framework.NewDefaultFramework("autoscaling") f := framework.NewDefaultFramework("autoscaling")
var nodeCount int var nodeCount int
var coresPerNode int var coresPerNode int
var memCapacityMb int var memCapacityMb int
var originalSizes map[string]int
BeforeEach(func() { BeforeEach(func() {
framework.SkipUnlessProviderIs("gce") framework.SkipUnlessProviderIs("gce")
@ -53,57 +55,54 @@ var _ = framework.KubeDescribe("Cluster size autoscaling scale up [Feature:Clust
mem := nodes.Items[0].Status.Capacity[api.ResourceMemory] mem := nodes.Items[0].Status.Capacity[api.ResourceMemory]
coresPerNode = int((&cpu).MilliValue() / 1000) coresPerNode = int((&cpu).MilliValue() / 1000)
memCapacityMb = int((&mem).Value() / 1024 / 1024) memCapacityMb = int((&mem).Value() / 1024 / 1024)
originalSizes = make(map[string]int)
sum := 0
for _, mig := range strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") {
size, err := GroupSize(mig)
framework.ExpectNoError(err)
By(fmt.Sprintf("Initial size of %s: %d", mig, size))
originalSizes[mig] = size
sum += size
}
Expect(nodeCount).Should(Equal(sum))
}) })
It("Should correctly handle pending pods", func() { It("shouldn't increase cluster size if pending pod it too large [Feature:ClusterSizeAutoscalingScaleUp]", func() {
By("Too large pending pod does not increase cluster size")
ReserveMemory(f, "memory-reservation", 1, memCapacityMb, false) ReserveMemory(f, "memory-reservation", 1, memCapacityMb, false)
// Verify, that cluster size is not changed. // Verify, that cluster size is not changed.
// TODO: find a better way of verification that the cluster size will remain unchanged. // TODO: find a better way of verification that the cluster size will remain unchanged using events.
time.Sleep(scaleUpTimeout) time.Sleep(scaleUpTimeout)
framework.ExpectNoError(framework.WaitForClusterSize(f.Client, nodeCount, scaleUpTimeout)) framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
func(size int) bool { return size <= nodeCount }, scaleDownTimeout))
framework.ExpectNoError(framework.DeleteRC(f.Client, f.Namespace.Name, "memory-reservation")) framework.ExpectNoError(framework.DeleteRC(f.Client, f.Namespace.Name, "memory-reservation"))
framework.ExpectNoError(ResizeGroup(int32(nodeCount))) framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
framework.ExpectNoError(framework.WaitForClusterSize(f.Client, nodeCount, resizeTimeout)) func(size int) bool { return size <= nodeCount }, scaleDownTimeout))
})
By("Small pending pods increase cluster size") It("should increase cluster size if pending pods are small [Feature:ClusterSizeAutoscalingScaleUp]", func() {
ReserveMemory(f, "memory-reservation", 100, nodeCount*memCapacityMb, false) ReserveMemory(f, "memory-reservation", 100, nodeCount*memCapacityMb, false)
// Verify, that cluster size is increased // Verify, that cluster size is increased
framework.ExpectNoError(WaitForClusterSizeFunc(f.Client, framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
func(size int) bool { return size >= nodeCount+1 }, scaleUpTimeout)) func(size int) bool { return size >= nodeCount+1 }, scaleUpTimeout))
framework.ExpectNoError(framework.DeleteRC(f.Client, f.Namespace.Name, "memory-reservation")) framework.ExpectNoError(framework.DeleteRC(f.Client, f.Namespace.Name, "memory-reservation"))
framework.ExpectNoError(ResizeGroup(int32(nodeCount))) restoreSizes(originalSizes)
framework.ExpectNoError(framework.WaitForClusterSize(f.Client, nodeCount, resizeTimeout)) framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
func(size int) bool { return size <= nodeCount }, scaleDownTimeout))
})
By("Handling node port pods") It("should increase cluster size if pods are pending due to host port conflict [Feature:ClusterSizeAutoscalingScaleUp]", func() {
CreateHostPortPods(f, "host-port", nodeCount+2, false) CreateHostPortPods(f, "host-port", nodeCount+2, false)
framework.ExpectNoError(WaitForClusterSizeFunc(f.Client, framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
func(size int) bool { return size >= nodeCount+2 }, scaleUpTimeout)) func(size int) bool { return size >= nodeCount+2 }, scaleUpTimeout))
framework.ExpectNoError(framework.DeleteRC(f.Client, f.Namespace.Name, "host-port"))
restoreSizes(originalSizes)
framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
func(size int) bool { return size <= nodeCount }, scaleDownTimeout))
framework.ExpectNoError(ResizeGroup(int32(nodeCount)))
framework.ExpectNoError(framework.WaitForClusterSize(f.Client, nodeCount, resizeTimeout))
})
})
var _ = framework.KubeDescribe("Cluster size autoscaling scale down[Feature:ClusterSizeAutoscalingScaleDown] [Slow]", func() {
f := framework.NewDefaultFramework("autoscaling")
var nodeCount int
var coresPerNode int
var memCapacityMb int
BeforeEach(func() {
framework.SkipUnlessProviderIs("gce")
nodes := framework.GetReadySchedulableNodesOrDie(f.Client)
nodeCount = len(nodes.Items)
Expect(nodeCount).NotTo(BeZero())
cpu := nodes.Items[0].Status.Capacity[api.ResourceCPU]
mem := nodes.Items[0].Status.Capacity[api.ResourceMemory]
coresPerNode = int((&cpu).MilliValue() / 1000)
memCapacityMb = int((&mem).Value() / 1024 / 1024)
}) })
It("Should correctly handle pending and scale down after deletion", func() { It("should correctly handle pending and scale down after deletion [Feature:ClusterSizeAutoscalingScaleDown]", func() {
By("Small pending pods increase cluster size") By("Small pending pods increase cluster size")
ReserveMemory(f, "memory-reservation", 100, nodeCount*memCapacityMb, false) ReserveMemory(f, "memory-reservation", 100, nodeCount*memCapacityMb, false)
// Verify, that cluster size is increased // Verify, that cluster size is increased
@ -192,3 +191,16 @@ func WaitForClusterSizeFunc(c *client.Client, sizeFunc func(int) bool, timeout t
} }
return fmt.Errorf("timeout waiting %v for appropriate cluster size", timeout) return fmt.Errorf("timeout waiting %v for appropriate cluster size", timeout)
} }
func restoreSizes(sizes map[string]int) {
By(fmt.Sprintf("Restoring initial size of the cluster"))
for mig, desiredSize := range sizes {
currentSize, err := GroupSize(mig)
framework.ExpectNoError(err)
if desiredSize != currentSize {
By(fmt.Sprintf("Setting size of %s to %d", mig, desiredSize))
err = ResizeGroup(mig, int32(desiredSize))
framework.ExpectNoError(err)
}
}
}

View File

@ -113,7 +113,7 @@ func RegisterFlags() {
flag.StringVar(&cloudConfig.Zone, "gce-zone", "", "GCE zone being used, if applicable") flag.StringVar(&cloudConfig.Zone, "gce-zone", "", "GCE zone being used, if applicable")
flag.StringVar(&cloudConfig.ServiceAccount, "gce-service-account", "", "GCE service account to use for GCE API calls, if applicable") flag.StringVar(&cloudConfig.ServiceAccount, "gce-service-account", "", "GCE service account to use for GCE API calls, if applicable")
flag.StringVar(&cloudConfig.Cluster, "gke-cluster", "", "GKE name of cluster being used, if applicable") flag.StringVar(&cloudConfig.Cluster, "gke-cluster", "", "GKE name of cluster being used, if applicable")
flag.StringVar(&cloudConfig.NodeInstanceGroup, "node-instance-group", "", "Name of the managed instance group for nodes. Valid only for gce, gke or aws") flag.StringVar(&cloudConfig.NodeInstanceGroup, "node-instance-group", "", "Name of the managed instance group for nodes. Valid only for gce, gke or aws. If there is more than one group: comma separated list of groups.")
flag.IntVar(&cloudConfig.NumNodes, "num-nodes", -1, "Number of nodes in the cluster") flag.IntVar(&cloudConfig.NumNodes, "num-nodes", -1, "Number of nodes in the cluster")
flag.StringVar(&cloudConfig.ClusterTag, "cluster-tag", "", "Tag used to identify resources. Only required if provider is aws.") flag.StringVar(&cloudConfig.ClusterTag, "cluster-tag", "", "Tag used to identify resources. Only required if provider is aws.")

View File

@ -53,7 +53,7 @@ const (
testPort = 9376 testPort = 9376
) )
func ResizeGroup(size int32) error { func ResizeGroup(group string, size int32) error {
if framework.TestContext.ReportDir != "" { if framework.TestContext.ReportDir != "" {
framework.CoreDump(framework.TestContext.ReportDir) framework.CoreDump(framework.TestContext.ReportDir)
defer framework.CoreDump(framework.TestContext.ReportDir) defer framework.CoreDump(framework.TestContext.ReportDir)
@ -62,7 +62,7 @@ func ResizeGroup(size int32) error {
// TODO: make this hit the compute API directly instead of shelling out to gcloud. // TODO: make this hit the compute API directly instead of shelling out to gcloud.
// TODO: make gce/gke implement InstanceGroups, so we can eliminate the per-provider logic // TODO: make gce/gke implement InstanceGroups, so we can eliminate the per-provider logic
output, err := exec.Command("gcloud", "compute", "instance-groups", "managed", "resize", output, err := exec.Command("gcloud", "compute", "instance-groups", "managed", "resize",
framework.TestContext.CloudConfig.NodeInstanceGroup, fmt.Sprintf("--size=%v", size), group, fmt.Sprintf("--size=%v", size),
"--project="+framework.TestContext.CloudConfig.ProjectID, "--zone="+framework.TestContext.CloudConfig.Zone).CombinedOutput() "--project="+framework.TestContext.CloudConfig.ProjectID, "--zone="+framework.TestContext.CloudConfig.Zone).CombinedOutput()
if err != nil { if err != nil {
framework.Logf("Failed to resize node instance group: %v", string(output)) framework.Logf("Failed to resize node instance group: %v", string(output))
@ -70,18 +70,18 @@ func ResizeGroup(size int32) error {
return err return err
} else if framework.TestContext.Provider == "aws" { } else if framework.TestContext.Provider == "aws" {
client := autoscaling.New(session.New()) client := autoscaling.New(session.New())
return awscloud.ResizeInstanceGroup(client, framework.TestContext.CloudConfig.NodeInstanceGroup, int(size)) return awscloud.ResizeInstanceGroup(client, group, int(size))
} else { } else {
return fmt.Errorf("Provider does not support InstanceGroups") return fmt.Errorf("Provider does not support InstanceGroups")
} }
} }
func groupSize() (int, error) { func GroupSize(group string) (int, error) {
if framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" { if framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" {
// TODO: make this hit the compute API directly instead of shelling out to gcloud. // TODO: make this hit the compute API directly instead of shelling out to gcloud.
// TODO: make gce/gke implement InstanceGroups, so we can eliminate the per-provider logic // TODO: make gce/gke implement InstanceGroups, so we can eliminate the per-provider logic
output, err := exec.Command("gcloud", "compute", "instance-groups", "managed", output, err := exec.Command("gcloud", "compute", "instance-groups", "managed",
"list-instances", framework.TestContext.CloudConfig.NodeInstanceGroup, "--project="+framework.TestContext.CloudConfig.ProjectID, "list-instances", group, "--project="+framework.TestContext.CloudConfig.ProjectID,
"--zone="+framework.TestContext.CloudConfig.Zone).CombinedOutput() "--zone="+framework.TestContext.CloudConfig.Zone).CombinedOutput()
if err != nil { if err != nil {
return -1, err return -1, err
@ -90,12 +90,12 @@ func groupSize() (int, error) {
return len(re.FindAllString(string(output), -1)), nil return len(re.FindAllString(string(output), -1)), nil
} else if framework.TestContext.Provider == "aws" { } else if framework.TestContext.Provider == "aws" {
client := autoscaling.New(session.New()) client := autoscaling.New(session.New())
instanceGroup, err := awscloud.DescribeInstanceGroup(client, framework.TestContext.CloudConfig.NodeInstanceGroup) instanceGroup, err := awscloud.DescribeInstanceGroup(client, group)
if err != nil { if err != nil {
return -1, fmt.Errorf("error describing instance group: %v", err) return -1, fmt.Errorf("error describing instance group: %v", err)
} }
if instanceGroup == nil { if instanceGroup == nil {
return -1, fmt.Errorf("instance group not found: %s", framework.TestContext.CloudConfig.NodeInstanceGroup) return -1, fmt.Errorf("instance group not found: %s", group)
} }
return instanceGroup.CurrentSize() return instanceGroup.CurrentSize()
} else { } else {
@ -103,10 +103,10 @@ func groupSize() (int, error) {
} }
} }
func waitForGroupSize(size int32) error { func WaitForGroupSize(group string, size int32) error {
timeout := 10 * time.Minute timeout := 10 * time.Minute
for start := time.Now(); time.Since(start) < timeout; time.Sleep(5 * time.Second) { for start := time.Now(); time.Since(start) < timeout; time.Sleep(5 * time.Second) {
currentSize, err := groupSize() currentSize, err := GroupSize(group)
if err != nil { if err != nil {
framework.Logf("Failed to get node instance group size: %v", err) framework.Logf("Failed to get node instance group size: %v", err)
continue continue
@ -347,13 +347,19 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() {
var c *client.Client var c *client.Client
var ns string var ns string
ignoreLabels := framework.ImagePullerLabels ignoreLabels := framework.ImagePullerLabels
var group string
BeforeEach(func() { BeforeEach(func() {
c = f.Client c = f.Client
ns = f.Namespace.Name ns = f.Namespace.Name
systemPods, err := framework.GetPodsInNamespace(c, ns, ignoreLabels) systemPods, err := framework.GetPodsInNamespace(c, ns, ignoreLabels)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
systemPodsNo = int32(len(systemPods)) systemPodsNo = int32(len(systemPods))
if strings.Index(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") >= 0 {
framework.Failf("Test dose not support cluster setup with more than one MIG: %s", framework.TestContext.CloudConfig.NodeInstanceGroup)
} else {
group = framework.TestContext.CloudConfig.NodeInstanceGroup
}
}) })
// Slow issue #13323 (8 min) // Slow issue #13323 (8 min)
@ -373,7 +379,7 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() {
} }
By("restoring the original node instance group size") By("restoring the original node instance group size")
if err := ResizeGroup(int32(framework.TestContext.CloudConfig.NumNodes)); err != nil { if err := ResizeGroup(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
framework.Failf("Couldn't restore the original node instance group size: %v", err) framework.Failf("Couldn't restore the original node instance group size: %v", err)
} }
// In GKE, our current tunneling setup has the potential to hold on to a broken tunnel (from a // In GKE, our current tunneling setup has the potential to hold on to a broken tunnel (from a
@ -388,7 +394,7 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() {
By("waiting 5 minutes for all dead tunnels to be dropped") By("waiting 5 minutes for all dead tunnels to be dropped")
time.Sleep(5 * time.Minute) time.Sleep(5 * time.Minute)
} }
if err := waitForGroupSize(int32(framework.TestContext.CloudConfig.NumNodes)); err != nil { if err := WaitForGroupSize(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
framework.Failf("Couldn't restore the original node instance group size: %v", err) framework.Failf("Couldn't restore the original node instance group size: %v", err)
} }
if err := framework.WaitForClusterSize(c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute); err != nil { if err := framework.WaitForClusterSize(c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute); err != nil {
@ -412,9 +418,9 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() {
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
By(fmt.Sprintf("decreasing cluster size to %d", replicas-1)) By(fmt.Sprintf("decreasing cluster size to %d", replicas-1))
err = ResizeGroup(replicas - 1) err = ResizeGroup(group, replicas-1)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
err = waitForGroupSize(replicas - 1) err = WaitForGroupSize(group, replicas-1)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
err = framework.WaitForClusterSize(c, int(replicas-1), 10*time.Minute) err = framework.WaitForClusterSize(c, int(replicas-1), 10*time.Minute)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
@ -436,9 +442,9 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() {
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
By(fmt.Sprintf("increasing cluster size to %d", replicas+1)) By(fmt.Sprintf("increasing cluster size to %d", replicas+1))
err = ResizeGroup(replicas + 1) err = ResizeGroup(group, replicas+1)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
err = waitForGroupSize(replicas + 1) err = WaitForGroupSize(group, replicas+1)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
err = framework.WaitForClusterSize(c, int(replicas+1), 10*time.Minute) err = framework.WaitForClusterSize(c, int(replicas+1), 10*time.Minute)
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())